{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import io\n",
    "import numpy as np\n",
    "from scipy.sparse import csr_matrix\n",
    "mat_file = io.loadmat('ACM.mat')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Mon Aug 08 18:23:50 2011',\n",
       " '__version__': '1.0',\n",
       " '__globals__': [],\n",
       " 'TvsP': <1903x12499 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 972973 stored elements in Compressed Sparse Column format>,\n",
       " 'PvsA': <12499x17431 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 37055 stored elements in Compressed Sparse Column format>,\n",
       " 'PvsV': <12499x196 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'AvsF': <17431x1804 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 30424 stored elements in Compressed Sparse Column format>,\n",
       " 'VvsC': <196x14 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 196 stored elements in Compressed Sparse Column format>,\n",
       " 'PvsL': <12499x73 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'PvsC': <12499x14 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'A': array([[array(['Raluca Paiu'], dtype='<U11')],\n",
       "        [array(['Panagiotis Karras'], dtype='<U17')],\n",
       "        [array(['Peter Bumbulis'], dtype='<U14')],\n",
       "        ...,\n",
       "        [array(['Zeyu Zheng'], dtype='<U10')],\n",
       "        [array(['Volker Roth'], dtype='<U11')],\n",
       "        [array(['Volker Roth'], dtype='<U11')]], dtype=object),\n",
       " 'C': array([[array(['KDD'], dtype='<U3')],\n",
       "        [array(['SIGMOD'], dtype='<U6')],\n",
       "        [array(['WWW'], dtype='<U3')],\n",
       "        [array(['SIGIR'], dtype='<U5')],\n",
       "        [array(['CIKM'], dtype='<U4')],\n",
       "        [array(['SODA'], dtype='<U4')],\n",
       "        [array(['STOC'], dtype='<U4')],\n",
       "        [array(['SOSP'], dtype='<U4')],\n",
       "        [array(['SPAA'], dtype='<U4')],\n",
       "        [array(['SIGCOMM'], dtype='<U7')],\n",
       "        [array(['MobiCOMM'], dtype='<U8')],\n",
       "        [array(['ICML'], dtype='<U4')],\n",
       "        [array(['COLT'], dtype='<U4')],\n",
       "        [array(['VLDB'], dtype='<U4')]], dtype=object),\n",
       " 'F': array([[array(['Ecole Polytechnique'], dtype='<U19')],\n",
       "        [array(['Aalborg University'], dtype='<U18')],\n",
       "        [array(['Aarhus University'], dtype='<U17')],\n",
       "        ...,\n",
       "        [array(['Ludwig-Maximilians-Universität München'], dtype='<U38')],\n",
       "        [array(['IBM Watson Research Center'], dtype='<U26')],\n",
       "        [array(['National Chemical Laboratory'], dtype='<U28')]],\n",
       "       dtype=object),\n",
       " 'L': array([[array(['A.0'], dtype='<U3')],\n",
       "        [array(['A.1'], dtype='<U3')],\n",
       "        [array(['A.m'], dtype='<U3')],\n",
       "        [array(['B.2'], dtype='<U3')],\n",
       "        [array(['B.3'], dtype='<U3')],\n",
       "        [array(['B.4'], dtype='<U3')],\n",
       "        [array(['B.5'], dtype='<U3')],\n",
       "        [array(['B.6'], dtype='<U3')],\n",
       "        [array(['B.7'], dtype='<U3')],\n",
       "        [array(['B.8'], dtype='<U3')],\n",
       "        [array(['C.0'], dtype='<U3')],\n",
       "        [array(['C.1'], dtype='<U3')],\n",
       "        [array(['C.2'], dtype='<U3')],\n",
       "        [array(['C.3'], dtype='<U3')],\n",
       "        [array(['C.4'], dtype='<U3')],\n",
       "        [array(['C.5'], dtype='<U3')],\n",
       "        [array(['C.m'], dtype='<U3')],\n",
       "        [array(['D.0'], dtype='<U3')],\n",
       "        [array(['D.1'], dtype='<U3')],\n",
       "        [array(['D.2'], dtype='<U3')],\n",
       "        [array(['D.3'], dtype='<U3')],\n",
       "        [array(['D.4'], dtype='<U3')],\n",
       "        [array(['D.m'], dtype='<U3')],\n",
       "        [array(['E.0'], dtype='<U3')],\n",
       "        [array(['E.1'], dtype='<U3')],\n",
       "        [array(['E.2'], dtype='<U3')],\n",
       "        [array(['E.3'], dtype='<U3')],\n",
       "        [array(['E.4'], dtype='<U3')],\n",
       "        [array(['E.5'], dtype='<U3')],\n",
       "        [array(['E.m'], dtype='<U3')],\n",
       "        [array(['F.0'], dtype='<U3')],\n",
       "        [array(['F.1'], dtype='<U3')],\n",
       "        [array(['F.2'], dtype='<U3')],\n",
       "        [array(['F.3'], dtype='<U3')],\n",
       "        [array(['F.4'], dtype='<U3')],\n",
       "        [array(['F.m'], dtype='<U3')],\n",
       "        [array(['G.0'], dtype='<U3')],\n",
       "        [array(['G.1'], dtype='<U3')],\n",
       "        [array(['G.2'], dtype='<U3')],\n",
       "        [array(['G.3'], dtype='<U3')],\n",
       "        [array(['G.4'], dtype='<U3')],\n",
       "        [array(['G.m'], dtype='<U3')],\n",
       "        [array(['H.0'], dtype='<U3')],\n",
       "        [array(['H.1'], dtype='<U3')],\n",
       "        [array(['H.2'], dtype='<U3')],\n",
       "        [array(['H.3'], dtype='<U3')],\n",
       "        [array(['H.4'], dtype='<U3')],\n",
       "        [array(['H.5'], dtype='<U3')],\n",
       "        [array(['H.m'], dtype='<U3')],\n",
       "        [array(['I.1'], dtype='<U3')],\n",
       "        [array(['I.2'], dtype='<U3')],\n",
       "        [array(['I.3'], dtype='<U3')],\n",
       "        [array(['I.4'], dtype='<U3')],\n",
       "        [array(['I.5'], dtype='<U3')],\n",
       "        [array(['I.6'], dtype='<U3')],\n",
       "        [array(['I.7'], dtype='<U3')],\n",
       "        [array(['I.m'], dtype='<U3')],\n",
       "        [array(['J.0'], dtype='<U3')],\n",
       "        [array(['J.1'], dtype='<U3')],\n",
       "        [array(['J.2'], dtype='<U3')],\n",
       "        [array(['J.3'], dtype='<U3')],\n",
       "        [array(['J.4'], dtype='<U3')],\n",
       "        [array(['J.5'], dtype='<U3')],\n",
       "        [array(['J.6'], dtype='<U3')],\n",
       "        [array(['J.7'], dtype='<U3')],\n",
       "        [array(['J.m'], dtype='<U3')],\n",
       "        [array(['K.0'], dtype='<U3')],\n",
       "        [array(['K.1'], dtype='<U3')],\n",
       "        [array(['K.2'], dtype='<U3')],\n",
       "        [array(['K.3'], dtype='<U3')],\n",
       "        [array(['K.4'], dtype='<U3')],\n",
       "        [array(['K.6'], dtype='<U3')],\n",
       "        [array(['K.m'], dtype='<U3')]], dtype=object),\n",
       " 'P': array([[array([\"'Influence and correlation in social networks  In many online social systems, social ties between users play an important role in dictating their behavior. One of the ways this can happen is through social influence, the phenomenon that the actions of a user can induce his/her friends to behave in a similar way. In systems where social influence exists, ideas, modes of behavior, or new technologies can diffuse through the network like an epidemic. Therefore, identifying and understanding social influence is of tremendous interest from both analysis and design points of view.   This is a difficult task in general, since there are factors such as homophily or unobserved confounding variables that can induce statistical correlation between the actions of friends in a social network. Distinguishing influence from these is essentially the problem of distinguishing correlation from causality, a notoriously hard statistical problem.   In this paper we study this problem systematically. We define fairly general models that replicate the aforementioned sources of social correlation. We then propose two simple tests that can identify influence as a source of social correlation when the time series of user actions is available.   We give a theoretical justification of one of the tests by proving that with high probability it succeeds in ruling out influence in a rather general model of social correlation. We also simulate our tests on a number of examples designed by randomly generating actions of nodes on a real social network (from Flickr) according to one of several models. Simulation results confirm that our test performs well on these data. Finally, we apply them to real tagging data on Flickr, exhibiting that while there is significant social correlation in tagging behavior on this system, this correlation cannot be attributed to social influence. '\"],\n",
       "       dtype='<U1876')],\n",
       "        [array([\"'Efficient semi-streaming algorithms for local triangle counting in massive graphs  In this paper we study the problem of local triangle counting in large graphs. Namely, given a large graph  G  = ( V;E ) we want to estimate as accurately as possible the number of triangles incident to every node &#965; &#8712;  V  in the graph. The problem of computing the  global  number of triangles in a graph has been considered before, but to our knowledge this is the first paper that addresses the problem of  local  triangle counting with a focus on the efficiency issues arising in massive graphs. The distribution of the local number of triangles and the related local clustering coefficient can be used in many interesting applications. For example, we show that the measures we compute can help to detect the presence of spamming activity in large-scale Web graphs, as well as to provide useful features to assess content quality in social networks.   For computing the local number of triangles we propose two approximation algorithms, which are based on the idea of min-wise independent permutations (Broder et al. 1998). Our algorithms operate in a semi-streaming fashion, using O(jV j) space in main memory and performing O(log jV j) sequential scans over the edges of the graph. The first algorithm we describe in this paper also uses O(jEj) space in external memory during computation, while the second algorithm uses only main memory. We present the theoretical analysis as well as experimental results in massive graphs demonstrating the practical efficiency of our approach. '\"],\n",
       "       dtype='<U1584')],\n",
       "        [array([\"'Structured entity identification and document categorization: two tasks with one joint model  Traditionally, research in identifying structured entities in documents has proceeded independently of document categorization research. In this paper, we observe that these two tasks have much to gain from each other. Apart from direct references to entities in a database, such as names of person entities, documents often also contain words that are correlated with discriminative entity attributes, such age-group and income-level of persons. This happens naturally in many enterprise domains such as CRM, Banking, etc. Then, entity identification, which is typically vulnerable against noise and incompleteness in direct references to entities in documents, can benefit from document categorization with respect to such attributes. In return, entity identification enables documents to be categorized according to different label-sets arising from entity attributes without requiring any supervision. In this paper, we propose a probabilistic generative model for joint entity identification and document categorization. We show how the parameters of the model can be estimated using an EM algorithm in an unsupervised fashion. Using extensive experiments over real and semi-synthetic data, we demonstrate that the two tasks can benefit immensely from each other when performed jointly using the proposed model. '\"],\n",
       "       dtype='<U1413')],\n",
       "        ...,\n",
       "        [array([\"'  Decentralized Information Flow Control (DIFC) is an approach to security that allows application writers to control how data flows between the pieces of an application and the outside world. As applied to privacy, DIFC allows untrusted software to compute with private data while trusted security code controls the release of that data. As applied to integrity, DIFC allows trusted code to protect untrusted software from unexpected malicious inputs. In either case, only bugs in the trusted code, which tends to be small and isolated, can lead to security violations.   We present  Flume , a new DIFC model that applies at the granularity of operating system processes and standard OS abstractions (e.g., pipes and file descriptors). Flume was designed for simplicity of mechanism, to ease DIFCs use in existing applications, and to allow safe interaction between conventional and DIFC-aware processes. Flume runs as a user-level reference monitor onLinux. A process confined by Flume cannot perform most system calls directly; instead, an interposition layer replaces system calls with IPCto the reference monitor, which enforces data flowpolicies and performs safe operations on the processs behalf. We ported a complex web application (MoinMoin Wiki) to Flume, changingonly 2% of the original code. Performance measurements show a 43% slowdown on read workloadsand a 34% slowdown on write workloads, which aremostly due to Flumes user-level implementation. '\"],\n",
       "       dtype='<U1465')],\n",
       "        [array([\"'  We propose SecVisor, a tiny hypervisor that ensures code integrity for commodity OS kernels. In particular, SecVisor ensures that only user-approved code can execute in kernel mode over the entire system lifetime. This protects the kernel against code injection attacks, such as kernel rootkits. SecVisor can achieve this propertyeven against an attacker who controls everything but the CPU, the memory controller, and system memory chips. Further, SecVisor can even defend against attackers with knowledge of zero-day kernel exploits.   Our goal is to make SecVisor amenable to formal verificationand manual audit, thereby making it possible to rule out known classes of vulnerabilities. To this end, SecVisor offers small code size and small external interface. We rely on memory virtualization to build SecVisor and implement two versions, one using software memory virtualization and the other using CPU-supported memory virtualization. The code sizes of the runtime portions of these versions are 1739 and 1112 lines, respectively. The size of the external interface for both versions of SecVisor is 2 hypercalls. It is easy to port OS kernels to SecVisor. We port the Linux kernel version 2.6.20 by adding 12 lines and deleting 81 lines, out of a total of approximately 4.3 million lines of code in the kernel. '\"],\n",
       "       dtype='<U1321')],\n",
       "        [array([\"'  This paper describes an efficient and robust approach to provide a  safe execution environment  for an entire operating system, such as Linux, and all its applications. The approach, which we call  Secure Virtual Architecture  (SVA), defines a virtual, low-level, typed instruction set suitable for executing all code on a system, including kernel and application code. SVA code is translated for execution by a virtual machine transparently, offline or online. SVA aims to enforce  fine-grained (object level) memory safety, control-flow integrity, type safety for a subset of objects , and  sound analysis . A virtual machine implementing SVA achieves these goals by using a novel approach that exploits properties of existing memory pools in the kernel and by preserving the kernels explicit control over memory, including custom allocators and explicit deallocation. Furthermore, the safety properties can be encoded compactly as extensions to the SVA type system, allowing the (complex) safety checking compiler to be outside the trusted computing base. SVA also defines a set of OS interface operations that abstract all privileged hardware instructions, allowing the virtual machine to monitor all privileged operations and control the physical resources on a given hardware platform. We have ported the Linux kernel to SVA, treating it as a new architecture, and made only minimal code changes (less than 300 lines of code) to the machine-independent parts of the kernel and device drivers. SVA is able to prevent 4 out of 5 memory safety exploits previously reported for the Linux 2.4.22 kernel for which exploit code is available, and would prevent the fifth one simply by compiling an additional kernel library. '\"],\n",
       "       dtype='<U1727')]], dtype=object),\n",
       " 'T': array([[array(['Finally'], dtype='<U7')],\n",
       "        [array(['In'], dtype='<U2')],\n",
       "        [array(['One'], dtype='<U3')],\n",
       "        ...,\n",
       "        [array(['depth'], dtype='<U5')],\n",
       "        [array(['polynomial-time'], dtype='<U15')],\n",
       "        [array(['&#949'], dtype='<U5')]], dtype=object),\n",
       " 'V': array([[array(['Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U97')],\n",
       "        [array(['Proceeding of the 17th ACM conference on Information and knowledge management'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceeding of the 17th international conference on World Wide Web'],\n",
       "       dtype='<U65')],\n",
       "        [array(['Proceeding of the 18th ACM conference on Information and knowledge management'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceeding of the 33rd international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U110')],\n",
       "        [array(['Proceedings of seventh annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 10th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 10th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the 11th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 11th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the 11th international conference on World Wide Web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U98')],\n",
       "        [array(['Proceedings of the 12th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 12th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the 12th international conference on World Wide Web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 13th ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U98')],\n",
       "        [array(['Proceedings of the 13th annual ACM international conference on Mobile computing and networking'],\n",
       "       dtype='<U94')],\n",
       "        [array(['Proceedings of the 13th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 13th international World Wide Web conference on Alternate track papers &amp; posters'],\n",
       "       dtype='<U103')],\n",
       "        [array(['Proceedings of the 13th international conference on World Wide Web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 14th ACM international conference on Information and knowledge management'],\n",
       "       dtype='<U92')],\n",
       "        [array(['Proceedings of the 14th ACM international conference on Mobile computing and networking'],\n",
       "       dtype='<U87')],\n",
       "        [array(['Proceedings of the 14th international conference on World Wide Web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U98')],\n",
       "        [array(['Proceedings of the 15th ACM international conference on Information and knowledge management'],\n",
       "       dtype='<U92')],\n",
       "        [array(['Proceedings of the 15th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 15th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the 15th international conference on World Wide Web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 16th ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U98')],\n",
       "        [array(['Proceedings of the 16th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 16th international conference on World Wide Web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 18th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 18th international conference on World wide web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 1986 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1987 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1988 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1989 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1990 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1992 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1993 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1994 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1995 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1996 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1997 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1998 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 1999 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 19th ACM international conference on Information and knowledge management'],\n",
       "       dtype='<U92')],\n",
       "        [array(['Proceedings of the 19th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 19th international conference on World wide web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 1st annual international ACM SIGIR conference on Information storage and retrieval'],\n",
       "       dtype='<U101')],\n",
       "        [array(['Proceedings of the 1st annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the 2000 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2001 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2001 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2002 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2002 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2003 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2003 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2004 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2005 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2005 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2006 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2006 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2007 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2007 conference on Applications, technologies, architectures, and protocols for computer communications'],\n",
       "       dtype='<U122')],\n",
       "        [array(['Proceedings of the 2008 ACM SIGMOD international conference on Management of data'],\n",
       "       dtype='<U81')],\n",
       "        [array(['Proceedings of the 2010 international conference on Management of data'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the 20th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 20th international conference companion on World wide web'],\n",
       "       dtype='<U76')],\n",
       "        [array(['Proceedings of the 20th international conference on World wide web'],\n",
       "       dtype='<U66')],\n",
       "        [array(['Proceedings of the 21st annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 22nd ACM symposium on Parallelism in algorithms and architectures'],\n",
       "       dtype='<U84')],\n",
       "        [array(['Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 23rd international conference on Machine learning'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 25th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 25th international conference on Machine learning'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the 26th Annual International Conference on Machine Learning'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval'],\n",
       "       dtype='<U117')],\n",
       "        [array(['Proceedings of the 27th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 28th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 28th international conference on Very Large Data Bases'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the 29th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 29th international conference on Very large data bases - Volume 29'],\n",
       "       dtype='<U85')],\n",
       "        [array(['Proceedings of the 30th annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 31st annual international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U118')],\n",
       "        [array(['Proceedings of the 31st international conference on Very large data bases'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval'],\n",
       "       dtype='<U111')],\n",
       "        [array(['Proceedings of the 32nd international conference on Very large data bases'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the 35th SIGMOD international conference on Management of data'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceedings of the 40th annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U67')],\n",
       "        [array(['Proceedings of the 41st annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U67')],\n",
       "        [array(['Proceedings of the 42nd ACM symposium on Theory of computing'],\n",
       "       dtype='<U60')],\n",
       "        [array(['Proceedings of the 6th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the 7th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the 8th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the 9th annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U89')],\n",
       "        [array([\"Proceedings of the ACM SIGCOMM '97 conference on Applications, technologies, architectures, and protocols for computer communication\"],\n",
       "       dtype='<U132')],\n",
       "        [array([\"Proceedings of the ACM SIGCOMM '98 conference on Applications, technologies, architectures, and protocols for computer communication\"],\n",
       "       dtype='<U132')],\n",
       "        [array(['Proceedings of the ACM SIGCOMM 2008 conference on Data communication'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the ACM SIGCOMM 2009 conference on Data communication'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the ACM SIGCOMM 2010 conference on SIGCOMM'],\n",
       "       dtype='<U57')],\n",
       "        [array(['Proceedings of the ACM SIGCOMM conference on Communications architectures &amp; protocols'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the ACM SIGCOMM symposium on Communications architectures and protocols_ tutorials &amp; symposium'],\n",
       "       dtype='<U113')],\n",
       "        [array(['Proceedings of the ACM SIGOPS 22nd symposium on Operating systems principles'],\n",
       "       dtype='<U76')],\n",
       "        [array(['Proceedings of the ACM symposium on Communications architectures &amp; protocols'],\n",
       "       dtype='<U80')],\n",
       "        [array(['Proceedings of the ACM workshop on Frontiers in computer communications technology'],\n",
       "       dtype='<U82')],\n",
       "        [array(['Proceedings of the Thirtieth international conference on Very large data bases - Volume 30'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the conference on Applications, technologies, architectures, and protocols for computer communication'],\n",
       "       dtype='<U116')],\n",
       "        [array(['Proceedings of the conference on Communications architectures, protocols and applications'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the eighteenth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the eighteenth annual ACM symposium on Parallelism in algorithms and architectures'],\n",
       "       dtype='<U97')],\n",
       "        [array(['Proceedings of the eighth ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U100')],\n",
       "        [array(['Proceedings of the eighth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U71')],\n",
       "        [array(['Proceedings of the eighth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U87')],\n",
       "        [array(['Proceedings of the eighth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U69')],\n",
       "        [array(['Proceedings of the eighth international conference on Information and knowledge management'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the eighth symposium on Data communications'],\n",
       "       dtype='<U58')],\n",
       "        [array(['Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining'],\n",
       "       dtype='<U101')],\n",
       "        [array(['Proceedings of the eleventh ACM Symposium on Operating systems principles'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the eleventh annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U71')],\n",
       "        [array(['Proceedings of the eleventh international conference on Information and knowledge management'],\n",
       "       dtype='<U92')],\n",
       "        [array(['Proceedings of the fifteenth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U90')],\n",
       "        [array(['Proceedings of the fifteenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U72')],\n",
       "        [array(['Proceedings of the fifteenth annual ACM-SIAM symposium on Discrete algorithms'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceedings of the fifth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the fifth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the fifth annual workshop on Computational learning theory'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the fifth symposium on Data communications'],\n",
       "       dtype='<U57')],\n",
       "        [array(['Proceedings of the first ACM symposium on Operating System Principles'],\n",
       "       dtype='<U69')],\n",
       "        [array(['Proceedings of the first annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the fourteenth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the fourteenth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U91')],\n",
       "        [array(['Proceedings of the fourteenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the fourteenth annual ACM-SIAM symposium on Discrete algorithms'],\n",
       "       dtype='<U78')],\n",
       "        [array(['Proceedings of the fourth ACM symposium on Operating system principles'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the fourth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U69')],\n",
       "        [array(['Proceedings of the nineteenth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the nineteenth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U91')],\n",
       "        [array(['Proceedings of the nineteenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the nineteenth annual ACM-SIAM symposium on Discrete algorithms'],\n",
       "       dtype='<U78')],\n",
       "        [array(['Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U99')],\n",
       "        [array(['Proceedings of the ninth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the ninth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the ninth symposium on Data communications'],\n",
       "       dtype='<U57')],\n",
       "        [array(['Proceedings of the second annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U69')],\n",
       "        [array(['Proceedings of the seventeenth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U76')],\n",
       "        [array(['Proceedings of the seventeenth annual ACM symposium on Parallelism in algorithms and architectures'],\n",
       "       dtype='<U98')],\n",
       "        [array(['Proceedings of the seventeenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U74')],\n",
       "        [array(['Proceedings of the seventeenth annual ACM-SIAM symposium on Discrete algorithm'],\n",
       "       dtype='<U78')],\n",
       "        [array(['Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U101')],\n",
       "        [array(['Proceedings of the seventh ACM symposium on Operating systems principles'],\n",
       "       dtype='<U72')],\n",
       "        [array(['Proceedings of the seventh annual conference on Computational learning theory'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceedings of the seventh symposium on Data communications'],\n",
       "       dtype='<U59')],\n",
       "        [array(['Proceedings of the sixteenth ACM conference on Conference on information and knowledge management'],\n",
       "       dtype='<U97')],\n",
       "        [array(['Proceedings of the sixteenth annual ACM symposium on Parallelism in algorithms and architectures'],\n",
       "       dtype='<U96')],\n",
       "        [array(['Proceedings of the sixteenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U72')],\n",
       "        [array(['Proceedings of the sixteenth annual international conference on Mobile computing and networking'],\n",
       "       dtype='<U95')],\n",
       "        [array(['Proceedings of the sixth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the sixth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U86')],\n",
       "        [array(['Proceedings of the sixth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the sixth international conference on Information and knowledge management'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the sixth symposium on Data communications'],\n",
       "       dtype='<U57')],\n",
       "        [array(['Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining'],\n",
       "       dtype='<U99')],\n",
       "        [array(['Proceedings of the tenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the tenth international conference on Information and knowledge management'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the third ACM symposium on Operating systems principles'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the third annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U68')],\n",
       "        [array(['Proceedings of the third annual ACM-SIAM symposium on Discrete algorithms'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the third international conference on Information and knowledge management'],\n",
       "       dtype='<U89')],\n",
       "        [array(['Proceedings of the thirteenth ACM international conference on Information and knowledge management'],\n",
       "       dtype='<U98')],\n",
       "        [array(['Proceedings of the thirteenth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the thirteenth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U91')],\n",
       "        [array(['Proceedings of the thirteenth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U73')],\n",
       "        [array(['Proceedings of the thirteenth annual ACM-SIAM symposium on Discrete algorithms'],\n",
       "       dtype='<U78')],\n",
       "        [array(['Proceedings of the thirty-eighth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U76')],\n",
       "        [array(['Proceedings of the thirty-fifth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the thirty-ninth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the thirty-seventh annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceedings of the thirty-sixth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the thirty-third annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the thiry-fourth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the twelfth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U72')],\n",
       "        [array(['Proceedings of the twelfth annual ACM symposium on Parallel algorithms and architectures'],\n",
       "       dtype='<U88')],\n",
       "        [array(['Proceedings of the twelfth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U70')],\n",
       "        [array(['Proceedings of the twelfth annual ACM-SIAM symposium on Discrete algorithms'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the twelfth international conference on Information and knowledge management'],\n",
       "       dtype='<U91')],\n",
       "        [array(['Proceedings of the twentieth ACM symposium on Operating systems principles'],\n",
       "       dtype='<U74')],\n",
       "        [array(['Proceedings of the twentieth Annual ACM-SIAM Symposium on Discrete Algorithms'],\n",
       "       dtype='<U77')],\n",
       "        [array(['Proceedings of the twentieth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U72')],\n",
       "        [array(['Proceedings of the twentieth annual symposium on Parallelism in algorithms and architectures'],\n",
       "       dtype='<U92')],\n",
       "        [array(['Proceedings of the twenty-first annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U75')],\n",
       "        [array(['Proceedings of the twenty-first annual symposium on Parallelism in algorithms and architectures'],\n",
       "       dtype='<U95')],\n",
       "        [array(['Proceedings of the twenty-first international conference on Machine learning'],\n",
       "       dtype='<U76')],\n",
       "        [array(['Proceedings of the twenty-fourth annual ACM symposium on Theory of computing'],\n",
       "       dtype='<U76')],\n",
       "        [array(['Proceedings of twenty-first ACM SIGOPS symposium on Operating systems principles'],\n",
       "       dtype='<U80')]], dtype=object),\n",
       " 'PvsT': <12499x1903 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 972973 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormPvsA': <12499x17431 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 37055 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormPvsA': <12499x17431 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 37055 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormPvsC': <12499x14 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormPvsC': <12499x14 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormPvsT': <12499x1903 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 972973 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormPvsT': <12499x1903 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 972973 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormPvsV': <12499x196 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormPvsV': <12499x196 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormVvsC': <196x14 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 196 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormVvsC': <196x14 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 196 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormAvsF': <17431x1804 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 30424 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormAvsF': <17431x1804 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 30424 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormPvsL': <12499x73 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormPvsL': <12499x73 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 12499 stored elements in Compressed Sparse Column format>,\n",
       " 'stopwords': array([[array(['a'], dtype='<U1')],\n",
       "        [array(['able'], dtype='<U4')],\n",
       "        [array(['about'], dtype='<U5')],\n",
       "        [array(['above'], dtype='<U5')],\n",
       "        [array(['abst'], dtype='<U4')],\n",
       "        [array(['accordance'], dtype='<U10')],\n",
       "        [array(['according'], dtype='<U9')],\n",
       "        [array(['accordingly'], dtype='<U11')],\n",
       "        [array(['across'], dtype='<U6')],\n",
       "        [array(['act'], dtype='<U3')],\n",
       "        [array(['actually'], dtype='<U8')],\n",
       "        [array(['added'], dtype='<U5')],\n",
       "        [array(['adj'], dtype='<U3')],\n",
       "        [array(['adopted'], dtype='<U7')],\n",
       "        [array(['affected'], dtype='<U8')],\n",
       "        [array(['affecting'], dtype='<U9')],\n",
       "        [array(['affects'], dtype='<U7')],\n",
       "        [array(['after'], dtype='<U5')],\n",
       "        [array(['afterwards'], dtype='<U10')],\n",
       "        [array(['again'], dtype='<U5')],\n",
       "        [array(['against'], dtype='<U7')],\n",
       "        [array(['ah'], dtype='<U2')],\n",
       "        [array(['all'], dtype='<U3')],\n",
       "        [array(['almost'], dtype='<U6')],\n",
       "        [array(['alone'], dtype='<U5')],\n",
       "        [array(['along'], dtype='<U5')],\n",
       "        [array(['already'], dtype='<U7')],\n",
       "        [array(['also'], dtype='<U4')],\n",
       "        [array(['although'], dtype='<U8')],\n",
       "        [array(['always'], dtype='<U6')],\n",
       "        [array(['am'], dtype='<U2')],\n",
       "        [array(['among'], dtype='<U5')],\n",
       "        [array(['amongst'], dtype='<U7')],\n",
       "        [array(['an'], dtype='<U2')],\n",
       "        [array(['and'], dtype='<U3')],\n",
       "        [array(['announce'], dtype='<U8')],\n",
       "        [array(['another'], dtype='<U7')],\n",
       "        [array(['any'], dtype='<U3')],\n",
       "        [array(['anybody'], dtype='<U7')],\n",
       "        [array(['anyhow'], dtype='<U6')],\n",
       "        [array(['anymore'], dtype='<U7')],\n",
       "        [array(['anyone'], dtype='<U6')],\n",
       "        [array(['anything'], dtype='<U8')],\n",
       "        [array(['anyway'], dtype='<U6')],\n",
       "        [array(['anyways'], dtype='<U7')],\n",
       "        [array(['anywhere'], dtype='<U8')],\n",
       "        [array(['apparently'], dtype='<U10')],\n",
       "        [array(['approximately'], dtype='<U13')],\n",
       "        [array(['are'], dtype='<U3')],\n",
       "        [array(['aren'], dtype='<U4')],\n",
       "        [array(['arent'], dtype='<U5')],\n",
       "        [array(['arise'], dtype='<U5')],\n",
       "        [array(['around'], dtype='<U6')],\n",
       "        [array(['as'], dtype='<U2')],\n",
       "        [array(['aside'], dtype='<U5')],\n",
       "        [array(['ask'], dtype='<U3')],\n",
       "        [array(['asking'], dtype='<U6')],\n",
       "        [array(['at'], dtype='<U2')],\n",
       "        [array(['auth'], dtype='<U4')],\n",
       "        [array(['available'], dtype='<U9')],\n",
       "        [array(['away'], dtype='<U4')],\n",
       "        [array(['awfully'], dtype='<U7')],\n",
       "        [array(['b'], dtype='<U1')],\n",
       "        [array(['back'], dtype='<U4')],\n",
       "        [array(['be'], dtype='<U2')],\n",
       "        [array(['became'], dtype='<U6')],\n",
       "        [array(['because'], dtype='<U7')],\n",
       "        [array(['become'], dtype='<U6')],\n",
       "        [array(['becomes'], dtype='<U7')],\n",
       "        [array(['becoming'], dtype='<U8')],\n",
       "        [array(['been'], dtype='<U4')],\n",
       "        [array(['before'], dtype='<U6')],\n",
       "        [array(['beforehand'], dtype='<U10')],\n",
       "        [array(['begin'], dtype='<U5')],\n",
       "        [array(['beginning'], dtype='<U9')],\n",
       "        [array(['beginnings'], dtype='<U10')],\n",
       "        [array(['begins'], dtype='<U6')],\n",
       "        [array(['behind'], dtype='<U6')],\n",
       "        [array(['being'], dtype='<U5')],\n",
       "        [array(['believe'], dtype='<U7')],\n",
       "        [array(['below'], dtype='<U5')],\n",
       "        [array(['beside'], dtype='<U6')],\n",
       "        [array(['besides'], dtype='<U7')],\n",
       "        [array(['between'], dtype='<U7')],\n",
       "        [array(['beyond'], dtype='<U6')],\n",
       "        [array(['biol'], dtype='<U4')],\n",
       "        [array(['both'], dtype='<U4')],\n",
       "        [array(['brief'], dtype='<U5')],\n",
       "        [array(['briefly'], dtype='<U7')],\n",
       "        [array(['but'], dtype='<U3')],\n",
       "        [array(['by'], dtype='<U2')],\n",
       "        [array(['c'], dtype='<U1')],\n",
       "        [array(['ca'], dtype='<U2')],\n",
       "        [array(['came'], dtype='<U4')],\n",
       "        [array(['can'], dtype='<U3')],\n",
       "        [array(['cannot'], dtype='<U6')],\n",
       "        [array([\"can't\"], dtype='<U5')],\n",
       "        [array(['cause'], dtype='<U5')],\n",
       "        [array(['causes'], dtype='<U6')],\n",
       "        [array(['certain'], dtype='<U7')],\n",
       "        [array(['certainly'], dtype='<U9')],\n",
       "        [array(['co'], dtype='<U2')],\n",
       "        [array(['com'], dtype='<U3')],\n",
       "        [array(['come'], dtype='<U4')],\n",
       "        [array(['comes'], dtype='<U5')],\n",
       "        [array(['contain'], dtype='<U7')],\n",
       "        [array(['containing'], dtype='<U10')],\n",
       "        [array(['contains'], dtype='<U8')],\n",
       "        [array(['could'], dtype='<U5')],\n",
       "        [array(['couldnt'], dtype='<U7')],\n",
       "        [array(['d'], dtype='<U1')],\n",
       "        [array(['date'], dtype='<U4')],\n",
       "        [array(['did'], dtype='<U3')],\n",
       "        [array([\"didn't\"], dtype='<U6')],\n",
       "        [array(['different'], dtype='<U9')],\n",
       "        [array(['do'], dtype='<U2')],\n",
       "        [array(['does'], dtype='<U4')],\n",
       "        [array([\"doesn't\"], dtype='<U7')],\n",
       "        [array(['doing'], dtype='<U5')],\n",
       "        [array(['done'], dtype='<U4')],\n",
       "        [array([\"don't\"], dtype='<U5')],\n",
       "        [array(['down'], dtype='<U4')],\n",
       "        [array(['downwards'], dtype='<U9')],\n",
       "        [array(['due'], dtype='<U3')],\n",
       "        [array(['during'], dtype='<U6')],\n",
       "        [array(['e'], dtype='<U1')],\n",
       "        [array(['each'], dtype='<U4')],\n",
       "        [array(['ed'], dtype='<U2')],\n",
       "        [array(['edu'], dtype='<U3')],\n",
       "        [array(['effect'], dtype='<U6')],\n",
       "        [array(['eg'], dtype='<U2')],\n",
       "        [array(['eight'], dtype='<U5')],\n",
       "        [array(['eighty'], dtype='<U6')],\n",
       "        [array(['either'], dtype='<U6')],\n",
       "        [array(['else'], dtype='<U4')],\n",
       "        [array(['elsewhere'], dtype='<U9')],\n",
       "        [array(['end'], dtype='<U3')],\n",
       "        [array(['ending'], dtype='<U6')],\n",
       "        [array(['enough'], dtype='<U6')],\n",
       "        [array(['especially'], dtype='<U10')],\n",
       "        [array(['et'], dtype='<U2')],\n",
       "        [array(['et-al'], dtype='<U5')],\n",
       "        [array(['etc'], dtype='<U3')],\n",
       "        [array(['even'], dtype='<U4')],\n",
       "        [array(['ever'], dtype='<U4')],\n",
       "        [array(['every'], dtype='<U5')],\n",
       "        [array(['everybody'], dtype='<U9')],\n",
       "        [array(['everyone'], dtype='<U8')],\n",
       "        [array(['everything'], dtype='<U10')],\n",
       "        [array(['everywhere'], dtype='<U10')],\n",
       "        [array(['ex'], dtype='<U2')],\n",
       "        [array(['except'], dtype='<U6')],\n",
       "        [array(['f'], dtype='<U1')],\n",
       "        [array(['far'], dtype='<U3')],\n",
       "        [array(['few'], dtype='<U3')],\n",
       "        [array(['ff'], dtype='<U2')],\n",
       "        [array(['fifth'], dtype='<U5')],\n",
       "        [array(['first'], dtype='<U5')],\n",
       "        [array(['five'], dtype='<U4')],\n",
       "        [array(['fix'], dtype='<U3')],\n",
       "        [array(['followed'], dtype='<U8')],\n",
       "        [array(['following'], dtype='<U9')],\n",
       "        [array(['follows'], dtype='<U7')],\n",
       "        [array(['for'], dtype='<U3')],\n",
       "        [array(['former'], dtype='<U6')],\n",
       "        [array(['formerly'], dtype='<U8')],\n",
       "        [array(['forth'], dtype='<U5')],\n",
       "        [array(['found'], dtype='<U5')],\n",
       "        [array(['four'], dtype='<U4')],\n",
       "        [array(['from'], dtype='<U4')],\n",
       "        [array(['further'], dtype='<U7')],\n",
       "        [array(['furthermore'], dtype='<U11')],\n",
       "        [array(['g'], dtype='<U1')],\n",
       "        [array(['gave'], dtype='<U4')],\n",
       "        [array(['get'], dtype='<U3')],\n",
       "        [array(['gets'], dtype='<U4')],\n",
       "        [array(['getting'], dtype='<U7')],\n",
       "        [array(['give'], dtype='<U4')],\n",
       "        [array(['given'], dtype='<U5')],\n",
       "        [array(['gives'], dtype='<U5')],\n",
       "        [array(['giving'], dtype='<U6')],\n",
       "        [array(['go'], dtype='<U2')],\n",
       "        [array(['goes'], dtype='<U4')],\n",
       "        [array(['gone'], dtype='<U4')],\n",
       "        [array(['got'], dtype='<U3')],\n",
       "        [array(['gotten'], dtype='<U6')],\n",
       "        [array(['h'], dtype='<U1')],\n",
       "        [array(['had'], dtype='<U3')],\n",
       "        [array(['happens'], dtype='<U7')],\n",
       "        [array(['hardly'], dtype='<U6')],\n",
       "        [array(['has'], dtype='<U3')],\n",
       "        [array([\"hasn't\"], dtype='<U6')],\n",
       "        [array(['have'], dtype='<U4')],\n",
       "        [array([\"haven't\"], dtype='<U7')],\n",
       "        [array(['having'], dtype='<U6')],\n",
       "        [array(['he'], dtype='<U2')],\n",
       "        [array(['hed'], dtype='<U3')],\n",
       "        [array(['hence'], dtype='<U5')],\n",
       "        [array(['her'], dtype='<U3')],\n",
       "        [array(['here'], dtype='<U4')],\n",
       "        [array(['hereafter'], dtype='<U9')],\n",
       "        [array(['hereby'], dtype='<U6')],\n",
       "        [array(['herein'], dtype='<U6')],\n",
       "        [array(['heres'], dtype='<U5')],\n",
       "        [array(['hereupon'], dtype='<U8')],\n",
       "        [array(['hers'], dtype='<U4')],\n",
       "        [array(['herself'], dtype='<U7')],\n",
       "        [array(['hes'], dtype='<U3')],\n",
       "        [array(['hi'], dtype='<U2')],\n",
       "        [array(['hid'], dtype='<U3')],\n",
       "        [array(['him'], dtype='<U3')],\n",
       "        [array(['himself'], dtype='<U7')],\n",
       "        [array(['his'], dtype='<U3')],\n",
       "        [array(['hither'], dtype='<U6')],\n",
       "        [array(['home'], dtype='<U4')],\n",
       "        [array(['how'], dtype='<U3')],\n",
       "        [array(['howbeit'], dtype='<U7')],\n",
       "        [array(['however'], dtype='<U7')],\n",
       "        [array(['hundred'], dtype='<U7')],\n",
       "        [array(['i'], dtype='<U1')],\n",
       "        [array(['id'], dtype='<U2')],\n",
       "        [array(['ie'], dtype='<U2')],\n",
       "        [array(['if'], dtype='<U2')],\n",
       "        [array([\"i'll\"], dtype='<U4')],\n",
       "        [array(['im'], dtype='<U2')],\n",
       "        [array(['immediate'], dtype='<U9')],\n",
       "        [array(['immediately'], dtype='<U11')],\n",
       "        [array(['importance'], dtype='<U10')],\n",
       "        [array(['important'], dtype='<U9')],\n",
       "        [array(['in'], dtype='<U2')],\n",
       "        [array(['inc'], dtype='<U3')],\n",
       "        [array(['indeed'], dtype='<U6')],\n",
       "        [array(['index'], dtype='<U5')],\n",
       "        [array(['information'], dtype='<U11')],\n",
       "        [array(['instead'], dtype='<U7')],\n",
       "        [array(['into'], dtype='<U4')],\n",
       "        [array(['invention'], dtype='<U9')],\n",
       "        [array(['inward'], dtype='<U6')],\n",
       "        [array(['is'], dtype='<U2')],\n",
       "        [array([\"isn't\"], dtype='<U5')],\n",
       "        [array(['it'], dtype='<U2')],\n",
       "        [array(['itd'], dtype='<U3')],\n",
       "        [array([\"it'll\"], dtype='<U5')],\n",
       "        [array(['its'], dtype='<U3')],\n",
       "        [array(['itself'], dtype='<U6')],\n",
       "        [array([\"i've\"], dtype='<U4')],\n",
       "        [array(['j'], dtype='<U1')],\n",
       "        [array(['just'], dtype='<U4')],\n",
       "        [array(['k'], dtype='<U1')],\n",
       "        [array(['keep'], dtype='<U4')],\n",
       "        [array(['keeps'], dtype='<U5')],\n",
       "        [array(['kept'], dtype='<U4')],\n",
       "        [array(['keys'], dtype='<U4')],\n",
       "        [array(['kg'], dtype='<U2')],\n",
       "        [array(['km'], dtype='<U2')],\n",
       "        [array(['know'], dtype='<U4')],\n",
       "        [array(['known'], dtype='<U5')],\n",
       "        [array(['knows'], dtype='<U5')],\n",
       "        [array(['l'], dtype='<U1')],\n",
       "        [array(['largely'], dtype='<U7')],\n",
       "        [array(['last'], dtype='<U4')],\n",
       "        [array(['lately'], dtype='<U6')],\n",
       "        [array(['later'], dtype='<U5')],\n",
       "        [array(['latter'], dtype='<U6')],\n",
       "        [array(['latterly'], dtype='<U8')],\n",
       "        [array(['least'], dtype='<U5')],\n",
       "        [array(['less'], dtype='<U4')],\n",
       "        [array(['lest'], dtype='<U4')],\n",
       "        [array(['let'], dtype='<U3')],\n",
       "        [array(['lets'], dtype='<U4')],\n",
       "        [array(['like'], dtype='<U4')],\n",
       "        [array(['liked'], dtype='<U5')],\n",
       "        [array(['likely'], dtype='<U6')],\n",
       "        [array(['line'], dtype='<U4')],\n",
       "        [array(['little'], dtype='<U6')],\n",
       "        [array([\"'ll\"], dtype='<U3')],\n",
       "        [array(['look'], dtype='<U4')],\n",
       "        [array(['looking'], dtype='<U7')],\n",
       "        [array(['looks'], dtype='<U5')],\n",
       "        [array(['ltd'], dtype='<U3')],\n",
       "        [array(['m'], dtype='<U1')],\n",
       "        [array(['made'], dtype='<U4')],\n",
       "        [array(['mainly'], dtype='<U6')],\n",
       "        [array(['make'], dtype='<U4')],\n",
       "        [array(['makes'], dtype='<U5')],\n",
       "        [array(['many'], dtype='<U4')],\n",
       "        [array(['may'], dtype='<U3')],\n",
       "        [array(['maybe'], dtype='<U5')],\n",
       "        [array(['me'], dtype='<U2')],\n",
       "        [array(['mean'], dtype='<U4')],\n",
       "        [array(['means'], dtype='<U5')],\n",
       "        [array(['meantime'], dtype='<U8')],\n",
       "        [array(['meanwhile'], dtype='<U9')],\n",
       "        [array(['merely'], dtype='<U6')],\n",
       "        [array(['mg'], dtype='<U2')],\n",
       "        [array(['might'], dtype='<U5')],\n",
       "        [array(['million'], dtype='<U7')],\n",
       "        [array(['miss'], dtype='<U4')],\n",
       "        [array(['ml'], dtype='<U2')],\n",
       "        [array(['more'], dtype='<U4')],\n",
       "        [array(['moreover'], dtype='<U8')],\n",
       "        [array(['most'], dtype='<U4')],\n",
       "        [array(['mostly'], dtype='<U6')],\n",
       "        [array(['mr'], dtype='<U2')],\n",
       "        [array(['mrs'], dtype='<U3')],\n",
       "        [array(['much'], dtype='<U4')],\n",
       "        [array(['mug'], dtype='<U3')],\n",
       "        [array(['must'], dtype='<U4')],\n",
       "        [array(['my'], dtype='<U2')],\n",
       "        [array(['myself'], dtype='<U6')],\n",
       "        [array(['n'], dtype='<U1')],\n",
       "        [array(['na'], dtype='<U2')],\n",
       "        [array(['name'], dtype='<U4')],\n",
       "        [array(['namely'], dtype='<U6')],\n",
       "        [array(['nay'], dtype='<U3')],\n",
       "        [array(['nd'], dtype='<U2')],\n",
       "        [array(['near'], dtype='<U4')],\n",
       "        [array(['nearly'], dtype='<U6')],\n",
       "        [array(['necessarily'], dtype='<U11')],\n",
       "        [array(['necessary'], dtype='<U9')],\n",
       "        [array(['need'], dtype='<U4')],\n",
       "        [array(['needs'], dtype='<U5')],\n",
       "        [array(['neither'], dtype='<U7')],\n",
       "        [array(['never'], dtype='<U5')],\n",
       "        [array(['nevertheless'], dtype='<U12')],\n",
       "        [array(['new'], dtype='<U3')],\n",
       "        [array(['next'], dtype='<U4')],\n",
       "        [array(['nine'], dtype='<U4')],\n",
       "        [array(['ninety'], dtype='<U6')],\n",
       "        [array(['no'], dtype='<U2')],\n",
       "        [array(['nobody'], dtype='<U6')],\n",
       "        [array(['non'], dtype='<U3')],\n",
       "        [array(['none'], dtype='<U4')],\n",
       "        [array(['nonetheless'], dtype='<U11')],\n",
       "        [array(['noone'], dtype='<U5')],\n",
       "        [array(['nor'], dtype='<U3')],\n",
       "        [array(['normally'], dtype='<U8')],\n",
       "        [array(['nos'], dtype='<U3')],\n",
       "        [array(['not'], dtype='<U3')],\n",
       "        [array(['noted'], dtype='<U5')],\n",
       "        [array(['nothing'], dtype='<U7')],\n",
       "        [array(['now'], dtype='<U3')],\n",
       "        [array(['nowhere'], dtype='<U7')],\n",
       "        [array(['o'], dtype='<U1')],\n",
       "        [array(['obtain'], dtype='<U6')],\n",
       "        [array(['obtained'], dtype='<U8')],\n",
       "        [array(['obviously'], dtype='<U9')],\n",
       "        [array(['of'], dtype='<U2')],\n",
       "        [array(['off'], dtype='<U3')],\n",
       "        [array(['often'], dtype='<U5')],\n",
       "        [array(['oh'], dtype='<U2')],\n",
       "        [array(['ok'], dtype='<U2')],\n",
       "        [array(['okay'], dtype='<U4')],\n",
       "        [array(['old'], dtype='<U3')],\n",
       "        [array(['omitted'], dtype='<U7')],\n",
       "        [array(['on'], dtype='<U2')],\n",
       "        [array(['once'], dtype='<U4')],\n",
       "        [array(['one'], dtype='<U3')],\n",
       "        [array(['ones'], dtype='<U4')],\n",
       "        [array(['only'], dtype='<U4')],\n",
       "        [array(['onto'], dtype='<U4')],\n",
       "        [array(['or'], dtype='<U2')],\n",
       "        [array(['ord'], dtype='<U3')],\n",
       "        [array(['other'], dtype='<U5')],\n",
       "        [array(['others'], dtype='<U6')],\n",
       "        [array(['otherwise'], dtype='<U9')],\n",
       "        [array(['ought'], dtype='<U5')],\n",
       "        [array(['our'], dtype='<U3')],\n",
       "        [array(['ours'], dtype='<U4')],\n",
       "        [array(['ourselves'], dtype='<U9')],\n",
       "        [array(['out'], dtype='<U3')],\n",
       "        [array(['outside'], dtype='<U7')],\n",
       "        [array(['over'], dtype='<U4')],\n",
       "        [array(['overall'], dtype='<U7')],\n",
       "        [array(['owing'], dtype='<U5')],\n",
       "        [array(['own'], dtype='<U3')],\n",
       "        [array(['p'], dtype='<U1')],\n",
       "        [array(['page'], dtype='<U4')],\n",
       "        [array(['pages'], dtype='<U5')],\n",
       "        [array(['part'], dtype='<U4')],\n",
       "        [array(['particular'], dtype='<U10')],\n",
       "        [array(['particularly'], dtype='<U12')],\n",
       "        [array(['past'], dtype='<U4')],\n",
       "        [array(['per'], dtype='<U3')],\n",
       "        [array(['perhaps'], dtype='<U7')],\n",
       "        [array(['placed'], dtype='<U6')],\n",
       "        [array(['please'], dtype='<U6')],\n",
       "        [array(['plus'], dtype='<U4')],\n",
       "        [array(['poorly'], dtype='<U6')],\n",
       "        [array(['possible'], dtype='<U8')],\n",
       "        [array(['possibly'], dtype='<U8')],\n",
       "        [array(['potentially'], dtype='<U11')],\n",
       "        [array(['pp'], dtype='<U2')],\n",
       "        [array(['predominantly'], dtype='<U13')],\n",
       "        [array(['present'], dtype='<U7')],\n",
       "        [array(['previously'], dtype='<U10')],\n",
       "        [array(['primarily'], dtype='<U9')],\n",
       "        [array(['probably'], dtype='<U8')],\n",
       "        [array(['promptly'], dtype='<U8')],\n",
       "        [array(['proud'], dtype='<U5')],\n",
       "        [array(['provides'], dtype='<U8')],\n",
       "        [array(['put'], dtype='<U3')],\n",
       "        [array(['q'], dtype='<U1')],\n",
       "        [array(['que'], dtype='<U3')],\n",
       "        [array(['quickly'], dtype='<U7')],\n",
       "        [array(['quite'], dtype='<U5')],\n",
       "        [array(['qv'], dtype='<U2')],\n",
       "        [array(['r'], dtype='<U1')],\n",
       "        [array(['ran'], dtype='<U3')],\n",
       "        [array(['rather'], dtype='<U6')],\n",
       "        [array(['rd'], dtype='<U2')],\n",
       "        [array(['re'], dtype='<U2')],\n",
       "        [array(['readily'], dtype='<U7')],\n",
       "        [array(['really'], dtype='<U6')],\n",
       "        [array(['recent'], dtype='<U6')],\n",
       "        [array(['recently'], dtype='<U8')],\n",
       "        [array(['ref'], dtype='<U3')],\n",
       "        [array(['refs'], dtype='<U4')],\n",
       "        [array(['regarding'], dtype='<U9')],\n",
       "        [array(['regardless'], dtype='<U10')],\n",
       "        [array(['regards'], dtype='<U7')],\n",
       "        [array(['related'], dtype='<U7')],\n",
       "        [array(['relatively'], dtype='<U10')],\n",
       "        [array(['research'], dtype='<U8')],\n",
       "        [array(['respectively'], dtype='<U12')],\n",
       "        [array(['resulted'], dtype='<U8')],\n",
       "        [array(['resulting'], dtype='<U9')],\n",
       "        [array(['results'], dtype='<U7')],\n",
       "        [array(['right'], dtype='<U5')],\n",
       "        [array(['run'], dtype='<U3')],\n",
       "        [array(['s'], dtype='<U1')],\n",
       "        [array(['said'], dtype='<U4')],\n",
       "        [array(['same'], dtype='<U4')],\n",
       "        [array(['saw'], dtype='<U3')],\n",
       "        [array(['say'], dtype='<U3')],\n",
       "        [array(['saying'], dtype='<U6')],\n",
       "        [array(['says'], dtype='<U4')],\n",
       "        [array(['sec'], dtype='<U3')],\n",
       "        [array(['section'], dtype='<U7')],\n",
       "        [array(['see'], dtype='<U3')],\n",
       "        [array(['seeing'], dtype='<U6')],\n",
       "        [array(['seem'], dtype='<U4')],\n",
       "        [array(['seemed'], dtype='<U6')],\n",
       "        [array(['seeming'], dtype='<U7')],\n",
       "        [array(['seems'], dtype='<U5')],\n",
       "        [array(['seen'], dtype='<U4')],\n",
       "        [array(['self'], dtype='<U4')],\n",
       "        [array(['selves'], dtype='<U6')],\n",
       "        [array(['sent'], dtype='<U4')],\n",
       "        [array(['seven'], dtype='<U5')],\n",
       "        [array(['several'], dtype='<U7')],\n",
       "        [array(['shall'], dtype='<U5')],\n",
       "        [array(['she'], dtype='<U3')],\n",
       "        [array(['shed'], dtype='<U4')],\n",
       "        [array([\"she'll\"], dtype='<U6')],\n",
       "        [array(['shes'], dtype='<U4')],\n",
       "        [array(['should'], dtype='<U6')],\n",
       "        [array([\"shouldn't\"], dtype='<U9')],\n",
       "        [array(['show'], dtype='<U4')],\n",
       "        [array(['showed'], dtype='<U6')],\n",
       "        [array(['shown'], dtype='<U5')],\n",
       "        [array(['showns'], dtype='<U6')],\n",
       "        [array(['shows'], dtype='<U5')],\n",
       "        [array(['significant'], dtype='<U11')],\n",
       "        [array(['significantly'], dtype='<U13')],\n",
       "        [array(['similar'], dtype='<U7')],\n",
       "        [array(['similarly'], dtype='<U9')],\n",
       "        [array(['since'], dtype='<U5')],\n",
       "        [array(['six'], dtype='<U3')],\n",
       "        [array(['slightly'], dtype='<U8')],\n",
       "        [array(['so'], dtype='<U2')],\n",
       "        [array(['some'], dtype='<U4')],\n",
       "        [array(['somebody'], dtype='<U8')],\n",
       "        [array(['somehow'], dtype='<U7')],\n",
       "        [array(['someone'], dtype='<U7')],\n",
       "        [array(['somethan'], dtype='<U8')],\n",
       "        [array(['something'], dtype='<U9')],\n",
       "        [array(['sometime'], dtype='<U8')],\n",
       "        [array(['sometimes'], dtype='<U9')],\n",
       "        [array(['somewhat'], dtype='<U8')],\n",
       "        [array(['somewhere'], dtype='<U9')],\n",
       "        [array(['soon'], dtype='<U4')],\n",
       "        [array(['sorry'], dtype='<U5')],\n",
       "        [array(['specifically'], dtype='<U12')],\n",
       "        [array(['specified'], dtype='<U9')],\n",
       "        [array(['specify'], dtype='<U7')],\n",
       "        [array(['specifying'], dtype='<U10')],\n",
       "        [array(['state'], dtype='<U5')],\n",
       "        [array(['states'], dtype='<U6')],\n",
       "        [array(['still'], dtype='<U5')],\n",
       "        [array(['stop'], dtype='<U4')],\n",
       "        [array(['strongly'], dtype='<U8')],\n",
       "        [array(['sub'], dtype='<U3')],\n",
       "        [array(['substantially'], dtype='<U13')],\n",
       "        [array(['successfully'], dtype='<U12')],\n",
       "        [array(['such'], dtype='<U4')],\n",
       "        [array(['sufficiently'], dtype='<U12')],\n",
       "        [array(['suggest'], dtype='<U7')],\n",
       "        [array(['sup'], dtype='<U3')],\n",
       "        [array(['sure'], dtype='<U4')],\n",
       "        [array(['t'], dtype='<U1')],\n",
       "        [array(['take'], dtype='<U4')],\n",
       "        [array(['taken'], dtype='<U5')],\n",
       "        [array(['taking'], dtype='<U6')],\n",
       "        [array(['tell'], dtype='<U4')],\n",
       "        [array(['tends'], dtype='<U5')],\n",
       "        [array(['th'], dtype='<U2')],\n",
       "        [array(['than'], dtype='<U4')],\n",
       "        [array(['thank'], dtype='<U5')],\n",
       "        [array(['thanks'], dtype='<U6')],\n",
       "        [array(['thanx'], dtype='<U5')],\n",
       "        [array(['that'], dtype='<U4')],\n",
       "        [array([\"that'll\"], dtype='<U7')],\n",
       "        [array(['thats'], dtype='<U5')],\n",
       "        [array([\"that've\"], dtype='<U7')],\n",
       "        [array(['the'], dtype='<U3')],\n",
       "        [array(['their'], dtype='<U5')],\n",
       "        [array(['theirs'], dtype='<U6')],\n",
       "        [array(['them'], dtype='<U4')],\n",
       "        [array(['themselves'], dtype='<U10')],\n",
       "        [array(['then'], dtype='<U4')],\n",
       "        [array(['thence'], dtype='<U6')],\n",
       "        [array(['there'], dtype='<U5')],\n",
       "        [array(['thereafter'], dtype='<U10')],\n",
       "        [array(['thereby'], dtype='<U7')],\n",
       "        [array(['thered'], dtype='<U6')],\n",
       "        [array(['therefore'], dtype='<U9')],\n",
       "        [array(['therein'], dtype='<U7')],\n",
       "        [array([\"there'll\"], dtype='<U8')],\n",
       "        [array(['thereof'], dtype='<U7')],\n",
       "        [array(['therere'], dtype='<U7')],\n",
       "        [array(['theres'], dtype='<U6')],\n",
       "        [array(['thereto'], dtype='<U7')],\n",
       "        [array(['thereupon'], dtype='<U9')],\n",
       "        [array([\"there've\"], dtype='<U8')],\n",
       "        [array(['these'], dtype='<U5')],\n",
       "        [array(['they'], dtype='<U4')],\n",
       "        [array(['theyd'], dtype='<U5')],\n",
       "        [array([\"they'll\"], dtype='<U7')],\n",
       "        [array(['theyre'], dtype='<U6')],\n",
       "        [array([\"they've\"], dtype='<U7')],\n",
       "        [array(['think'], dtype='<U5')],\n",
       "        [array(['this'], dtype='<U4')],\n",
       "        [array(['those'], dtype='<U5')],\n",
       "        [array(['thou'], dtype='<U4')],\n",
       "        [array(['though'], dtype='<U6')],\n",
       "        [array(['thoughh'], dtype='<U7')],\n",
       "        [array(['thousand'], dtype='<U8')],\n",
       "        [array(['throug'], dtype='<U6')],\n",
       "        [array(['through'], dtype='<U7')],\n",
       "        [array(['throughout'], dtype='<U10')],\n",
       "        [array(['thru'], dtype='<U4')],\n",
       "        [array(['thus'], dtype='<U4')],\n",
       "        [array(['til'], dtype='<U3')],\n",
       "        [array(['tip'], dtype='<U3')],\n",
       "        [array(['to'], dtype='<U2')],\n",
       "        [array(['together'], dtype='<U8')],\n",
       "        [array(['too'], dtype='<U3')],\n",
       "        [array(['took'], dtype='<U4')],\n",
       "        [array(['toward'], dtype='<U6')],\n",
       "        [array(['towards'], dtype='<U7')],\n",
       "        [array(['tried'], dtype='<U5')],\n",
       "        [array(['tries'], dtype='<U5')],\n",
       "        [array(['truly'], dtype='<U5')],\n",
       "        [array(['try'], dtype='<U3')],\n",
       "        [array(['trying'], dtype='<U6')],\n",
       "        [array(['ts'], dtype='<U2')],\n",
       "        [array(['twice'], dtype='<U5')],\n",
       "        [array(['two'], dtype='<U3')],\n",
       "        [array(['u'], dtype='<U1')],\n",
       "        [array(['un'], dtype='<U2')],\n",
       "        [array(['under'], dtype='<U5')],\n",
       "        [array(['unfortunately'], dtype='<U13')],\n",
       "        [array(['unless'], dtype='<U6')],\n",
       "        [array(['unlike'], dtype='<U6')],\n",
       "        [array(['unlikely'], dtype='<U8')],\n",
       "        [array(['until'], dtype='<U5')],\n",
       "        [array(['unto'], dtype='<U4')],\n",
       "        [array(['up'], dtype='<U2')],\n",
       "        [array(['upon'], dtype='<U4')],\n",
       "        [array(['ups'], dtype='<U3')],\n",
       "        [array(['us'], dtype='<U2')],\n",
       "        [array(['use'], dtype='<U3')],\n",
       "        [array(['used'], dtype='<U4')],\n",
       "        [array(['useful'], dtype='<U6')],\n",
       "        [array(['usefully'], dtype='<U8')],\n",
       "        [array(['usefulness'], dtype='<U10')],\n",
       "        [array(['uses'], dtype='<U4')],\n",
       "        [array(['using'], dtype='<U5')],\n",
       "        [array(['usually'], dtype='<U7')],\n",
       "        [array(['v'], dtype='<U1')],\n",
       "        [array(['value'], dtype='<U5')],\n",
       "        [array(['various'], dtype='<U7')],\n",
       "        [array([\"'ve\"], dtype='<U3')],\n",
       "        [array(['very'], dtype='<U4')],\n",
       "        [array(['via'], dtype='<U3')],\n",
       "        [array(['viz'], dtype='<U3')],\n",
       "        [array(['vol'], dtype='<U3')],\n",
       "        [array(['vols'], dtype='<U4')],\n",
       "        [array(['vs'], dtype='<U2')],\n",
       "        [array(['w'], dtype='<U1')],\n",
       "        [array(['want'], dtype='<U4')],\n",
       "        [array(['wants'], dtype='<U5')],\n",
       "        [array(['was'], dtype='<U3')],\n",
       "        [array([\"wasn't\"], dtype='<U6')],\n",
       "        [array(['way'], dtype='<U3')],\n",
       "        [array(['we'], dtype='<U2')],\n",
       "        [array(['wed'], dtype='<U3')],\n",
       "        [array(['welcome'], dtype='<U7')],\n",
       "        [array([\"we'll\"], dtype='<U5')],\n",
       "        [array(['went'], dtype='<U4')],\n",
       "        [array(['were'], dtype='<U4')],\n",
       "        [array([\"weren't\"], dtype='<U7')],\n",
       "        [array([\"we've\"], dtype='<U5')],\n",
       "        [array(['what'], dtype='<U4')],\n",
       "        [array(['whatever'], dtype='<U8')],\n",
       "        [array([\"what'll\"], dtype='<U7')],\n",
       "        [array(['whats'], dtype='<U5')],\n",
       "        [array(['when'], dtype='<U4')],\n",
       "        [array(['whence'], dtype='<U6')],\n",
       "        [array(['whenever'], dtype='<U8')],\n",
       "        [array(['where'], dtype='<U5')],\n",
       "        [array(['whereafter'], dtype='<U10')],\n",
       "        [array(['whereas'], dtype='<U7')],\n",
       "        [array(['whereby'], dtype='<U7')],\n",
       "        [array(['wherein'], dtype='<U7')],\n",
       "        [array(['wheres'], dtype='<U6')],\n",
       "        [array(['whereupon'], dtype='<U9')],\n",
       "        [array(['wherever'], dtype='<U8')],\n",
       "        [array(['whether'], dtype='<U7')],\n",
       "        [array(['which'], dtype='<U5')],\n",
       "        [array(['while'], dtype='<U5')],\n",
       "        [array(['whim'], dtype='<U4')],\n",
       "        [array(['whither'], dtype='<U7')],\n",
       "        [array(['who'], dtype='<U3')],\n",
       "        [array(['whod'], dtype='<U4')],\n",
       "        [array(['whoever'], dtype='<U7')],\n",
       "        [array(['whole'], dtype='<U5')],\n",
       "        [array([\"who'll\"], dtype='<U6')],\n",
       "        [array(['whom'], dtype='<U4')],\n",
       "        [array(['whomever'], dtype='<U8')],\n",
       "        [array(['whos'], dtype='<U4')],\n",
       "        [array(['whose'], dtype='<U5')],\n",
       "        [array(['why'], dtype='<U3')],\n",
       "        [array(['widely'], dtype='<U6')],\n",
       "        [array(['willing'], dtype='<U7')],\n",
       "        [array(['wish'], dtype='<U4')],\n",
       "        [array(['with'], dtype='<U4')],\n",
       "        [array(['within'], dtype='<U6')],\n",
       "        [array(['without'], dtype='<U7')],\n",
       "        [array([\"won't\"], dtype='<U5')],\n",
       "        [array(['words'], dtype='<U5')],\n",
       "        [array(['world'], dtype='<U5')],\n",
       "        [array(['would'], dtype='<U5')],\n",
       "        [array([\"wouldn't\"], dtype='<U8')],\n",
       "        [array(['www'], dtype='<U3')],\n",
       "        [array(['x'], dtype='<U1')],\n",
       "        [array(['y'], dtype='<U1')],\n",
       "        [array(['yes'], dtype='<U3')],\n",
       "        [array(['yet'], dtype='<U3')],\n",
       "        [array(['you'], dtype='<U3')],\n",
       "        [array(['youd'], dtype='<U4')],\n",
       "        [array([\"you'll\"], dtype='<U6')],\n",
       "        [array(['your'], dtype='<U4')],\n",
       "        [array(['youre'], dtype='<U5')],\n",
       "        [array(['yours'], dtype='<U5')],\n",
       "        [array(['yourself'], dtype='<U8')],\n",
       "        [array(['yourselves'], dtype='<U10')],\n",
       "        [array([\"you've\"], dtype='<U6')],\n",
       "        [array(['z'], dtype='<U1')],\n",
       "        [array(['zero'], dtype='<U4')]], dtype=object),\n",
       " 'nPvsT': <12499x1533 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 517180 stored elements in Compressed Sparse Column format>,\n",
       " 'nT': array([[array(['Finally'], dtype='<U7')],\n",
       "        [array(['actions'], dtype='<U7')],\n",
       "        [array(['analysis'], dtype='<U8')],\n",
       "        ...,\n",
       "        [array(['depth'], dtype='<U5')],\n",
       "        [array(['polynomial-time'], dtype='<U15')],\n",
       "        [array(['&#949'], dtype='<U5')]], dtype=object),\n",
       " 'CNormnPvsT': <12499x1533 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 517180 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormnPvsT': <12499x1533 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 517180 stored elements in Compressed Sparse Column format>,\n",
       " 'nnPvsT': <12499x767 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 185911 stored elements in Compressed Sparse Column format>,\n",
       " 'nnT': array([[array(['Search'], dtype='<U6')],\n",
       "        [array(['involved'], dtype='<U8')],\n",
       "        [array(['handling'], dtype='<U8')],\n",
       "        [array(['platform'], dtype='<U8')],\n",
       "        [array(['theorem'], dtype='<U7')],\n",
       "        [array(['conjecture'], dtype='<U10')],\n",
       "        [array(['streaming'], dtype='<U9')],\n",
       "        [array(['analyzed'], dtype='<U8')],\n",
       "        [array(['connectivity'], dtype='<U12')],\n",
       "        [array(['[1]'], dtype='<U3')],\n",
       "        [array(['vertex'], dtype='<U6')],\n",
       "        [array(['scenario'], dtype='<U8')],\n",
       "        [array(['plan'], dtype='<U4')],\n",
       "        [array(['versions'], dtype='<U8')],\n",
       "        [array(['identification'], dtype='<U14')],\n",
       "        [array(['optimizing'], dtype='<U10')],\n",
       "        [array(['runtime'], dtype='<U7')],\n",
       "        [array(['employ'], dtype='<U6')],\n",
       "        [array(['digital'], dtype='<U7')],\n",
       "        [array(['noise'], dtype='<U5')],\n",
       "        [array(['dimensionality'], dtype='<U14')],\n",
       "        [array(['superior'], dtype='<U8')],\n",
       "        [array(['keywords'], dtype='<U8')],\n",
       "        [array(['reported'], dtype='<U8')],\n",
       "        [array(['implementing'], dtype='<U12')],\n",
       "        [array(['layer'], dtype='<U5')],\n",
       "        [array(['label'], dtype='<U5')],\n",
       "        [array(['criteria'], dtype='<U8')],\n",
       "        [array(['personalized'], dtype='<U12')],\n",
       "        [array(['success'], dtype='<U7')],\n",
       "        [array(['separate'], dtype='<U8')],\n",
       "        [array(['variants'], dtype='<U8')],\n",
       "        [array(['integrate'], dtype='<U9')],\n",
       "        [array(['hold'], dtype='<U4')],\n",
       "        [array(['consisting'], dtype='<U10')],\n",
       "        [array(['allocation'], dtype='<U10')],\n",
       "        [array(['greatly'], dtype='<U7')],\n",
       "        [array(['interests'], dtype='<U9')],\n",
       "        [array(['unsupervised'], dtype='<U12')],\n",
       "        [array(['attribute'], dtype='<U9')],\n",
       "        [array(['earlier'], dtype='<U7')],\n",
       "        [array(['contributions'], dtype='<U13')],\n",
       "        [array(['coverage'], dtype='<U8')],\n",
       "        [array(['maximize'], dtype='<U8')],\n",
       "        [array(['remains'], dtype='<U7')],\n",
       "        [array(['valuable'], dtype='<U8')],\n",
       "        [array(['computations'], dtype='<U12')],\n",
       "        [array(['entity'], dtype='<U6')],\n",
       "        [array(['independently'], dtype='<U13')],\n",
       "        [array(['requiring'], dtype='<U9')],\n",
       "        [array(['modern'], dtype='<U6')],\n",
       "        [array(['dimensional'], dtype='<U11')],\n",
       "        [array(['establish'], dtype='<U9')],\n",
       "        [array(['recognition'], dtype='<U11')],\n",
       "        [array(['consistency'], dtype='<U11')],\n",
       "        [array(['connections'], dtype='<U11')],\n",
       "        [array(['desirable'], dtype='<U9')],\n",
       "        [array(['client'], dtype='<U6')],\n",
       "        [array(['involve'], dtype='<U7')],\n",
       "        [array(['regular'], dtype='<U7')],\n",
       "        [array(['project'], dtype='<U7')],\n",
       "        [array(['hybrid'], dtype='<U6')],\n",
       "        [array(['broad'], dtype='<U5')],\n",
       "        [array(['determined'], dtype='<U10')],\n",
       "        [array(['produced'], dtype='<U8')],\n",
       "        [array(['condition'], dtype='<U9')],\n",
       "        [array(['emerging'], dtype='<U8')],\n",
       "        [array(['completely'], dtype='<U10')],\n",
       "        [array(['implies'], dtype='<U7')],\n",
       "        [array(['gain'], dtype='<U4')],\n",
       "        [array(['architectures'], dtype='<U13')],\n",
       "        [array(['poor'], dtype='<U4')],\n",
       "        [array(['connection'], dtype='<U10')],\n",
       "        [array(['transmission'], dtype='<U12')],\n",
       "        [array(['communities'], dtype='<U11')],\n",
       "        [array(['employs'], dtype='<U7')],\n",
       "        [array(['requirement'], dtype='<U11')],\n",
       "        [array(['latency'], dtype='<U7')],\n",
       "        [array(['reliable'], dtype='<U8')],\n",
       "        [array(['exhibit'], dtype='<U7')],\n",
       "        [array(['minimizing'], dtype='<U10')],\n",
       "        [array(['heuristics'], dtype='<U10')],\n",
       "        [array(['personal'], dtype='<U8')],\n",
       "        [array(['delay'], dtype='<U5')],\n",
       "        [array(['exactly'], dtype='<U7')],\n",
       "        [array(['representative'], dtype='<U14')],\n",
       "        [array(['generates'], dtype='<U9')],\n",
       "        [array(['NP-hard'], dtype='<U7')],\n",
       "        [array(['designing'], dtype='<U9')],\n",
       "        [array(['workload'], dtype='<U8')],\n",
       "        [array(['appear'], dtype='<U6')],\n",
       "        [array(['captures'], dtype='<U8')],\n",
       "        [array(['unknown'], dtype='<U7')],\n",
       "        [array(['tested'], dtype='<U6')],\n",
       "        [array(['regions'], dtype='<U7')],\n",
       "        [array(['expressed'], dtype='<U9')],\n",
       "        [array(['integer'], dtype='<U7')],\n",
       "        [array(['locations'], dtype='<U9')],\n",
       "        [array(['categories'], dtype='<U10')],\n",
       "        [array(['embedded'], dtype='<U8')],\n",
       "        [array(['finds'], dtype='<U5')],\n",
       "        [array(['successful'], dtype='<U10')],\n",
       "        [array(['capable'], dtype='<U7')],\n",
       "        [array(['Bayesian'], dtype='<U8')],\n",
       "        [array(['deployment'], dtype='<U10')],\n",
       "        [array(['abstract'], dtype='<U8')],\n",
       "        [array(['discovered'], dtype='<U10')],\n",
       "        [array(['vectors'], dtype='<U7')],\n",
       "        [array(['findings'], dtype='<U8')],\n",
       "        [array(['utilize'], dtype='<U7')],\n",
       "        [array(['implicit'], dtype='<U8')],\n",
       "        [array(['experimentally'], dtype='<U14')],\n",
       "        [array(['changing'], dtype='<U8')],\n",
       "        [array(['extends'], dtype='<U7')],\n",
       "        [array(['virtual'], dtype='<U7')],\n",
       "        [array(['adapt'], dtype='<U5')],\n",
       "        [array(['requests'], dtype='<U8')],\n",
       "        [array(['feasible'], dtype='<U8')],\n",
       "        [array(['inputs'], dtype='<U6')],\n",
       "        [array(['empirically'], dtype='<U11')],\n",
       "        [array(['supervised'], dtype='<U10')],\n",
       "        [array(['spaces'], dtype='<U6')],\n",
       "        [array(['increased'], dtype='<U9')],\n",
       "        [array(['clients'], dtype='<U7')],\n",
       "        [array(['influence'], dtype='<U9')],\n",
       "        [array(['computationally'], dtype='<U15')],\n",
       "        [array(['association'], dtype='<U11')],\n",
       "        [array(['media'], dtype='<U5')],\n",
       "        [array(['sensor'], dtype='<U6')],\n",
       "        [array(['motivated'], dtype='<U9')],\n",
       "        [array(['indexes'], dtype='<U7')],\n",
       "        [array(['functionality'], dtype='<U13')],\n",
       "        [array(['recommendation'], dtype='<U14')],\n",
       "        [array(['inherent'], dtype='<U8')],\n",
       "        [array(['summarization'], dtype='<U13')],\n",
       "        [array(['constraint'], dtype='<U10')],\n",
       "        [array(['central'], dtype='<U7')],\n",
       "        [array(['maintenance'], dtype='<U11')],\n",
       "        [array(['allowed'], dtype='<U7')],\n",
       "        [array(['negative'], dtype='<U8')],\n",
       "        [array(['proved'], dtype='<U6')],\n",
       "        [array(['meaningful'], dtype='<U10')],\n",
       "        [array(['facilitate'], dtype='<U10')],\n",
       "        [array(['bits'], dtype='<U4')],\n",
       "        [array(['score'], dtype='<U5')],\n",
       "        [array(['topology'], dtype='<U8')],\n",
       "        [array(['manually'], dtype='<U8')],\n",
       "        [array(['subject'], dtype='<U7')],\n",
       "        [array(['lists'], dtype='<U5')],\n",
       "        [array(['logic'], dtype='<U5')],\n",
       "        [array(['event'], dtype='<U5')],\n",
       "        [array(['selecting'], dtype='<U9')],\n",
       "        [array(['reasonable'], dtype='<U10')],\n",
       "        [array(['heuristic'], dtype='<U9')],\n",
       "        [array(['channel'], dtype='<U7')],\n",
       "        [array(['interfaces'], dtype='<U10')],\n",
       "        [array(['external'], dtype='<U8')],\n",
       "        [array(['understand'], dtype='<U10')],\n",
       "        [array(['extremely'], dtype='<U9')],\n",
       "        [array(['received'], dtype='<U8')],\n",
       "        [array(['Retrieval'], dtype='<U9')],\n",
       "        [array(['choice'], dtype='<U6')],\n",
       "        [array(['formulation'], dtype='<U11')],\n",
       "        [array(['products'], dtype='<U8')],\n",
       "        [array(['fields'], dtype='<U6')],\n",
       "        [array(['policy'], dtype='<U6')],\n",
       "        [array(['infrastructure'], dtype='<U14')],\n",
       "        [array(['validate'], dtype='<U8')],\n",
       "        [array(['samples'], dtype='<U7')],\n",
       "        [array(['aim'], dtype='<U3')],\n",
       "        [array(['named'], dtype='<U5')],\n",
       "        [array(['retrieve'], dtype='<U8')],\n",
       "        [array(['packets'], dtype='<U7')],\n",
       "        [array(['incremental'], dtype='<U11')],\n",
       "        [array(['offer'], dtype='<U5')],\n",
       "        [array(['matches'], dtype='<U7')],\n",
       "        [array(['comprehensive'], dtype='<U13')],\n",
       "        [array(['representing'], dtype='<U12')],\n",
       "        [array(['retrieved'], dtype='<U9')],\n",
       "        [array(['considering'], dtype='<U11')],\n",
       "        [array(['scores'], dtype='<U6')],\n",
       "        [array(['automated'], dtype='<U9')],\n",
       "        [array(['deployed'], dtype='<U8')],\n",
       "        [array(['availability'], dtype='<U12')],\n",
       "        [array(['caching'], dtype='<U7')],\n",
       "        [array(['candidate'], dtype='<U9')],\n",
       "        [array(['--'], dtype='<U2')],\n",
       "        [array(['visual'], dtype='<U6')],\n",
       "        [array(['contents'], dtype='<U8')],\n",
       "        [array(['decisions'], dtype='<U9')],\n",
       "        [array(['expressions'], dtype='<U11')],\n",
       "        [array(['gap'], dtype='<U3')],\n",
       "        [array(['aims'], dtype='<U4')],\n",
       "        [array(['generic'], dtype='<U7')],\n",
       "        [array(['partitioning'], dtype='<U12')],\n",
       "        [array(['pair'], dtype='<U4')],\n",
       "        [array(['employed'], dtype='<U8')],\n",
       "        [array(['parts'], dtype='<U5')],\n",
       "        [array(['direct'], dtype='<U6')],\n",
       "        [array(['algorithmic'], dtype='<U11')],\n",
       "        [array(['dimension'], dtype='<U9')],\n",
       "        [array(['true'], dtype='<U4')],\n",
       "        [array(['achieving'], dtype='<U9')],\n",
       "        [array(['frequently'], dtype='<U10')],\n",
       "        [array(['keyword'], dtype='<U7')],\n",
       "        [array(['dimensions'], dtype='<U10')],\n",
       "        [array(['involving'], dtype='<U9')],\n",
       "        [array(['on-line'], dtype='<U7')],\n",
       "        [array(['detecting'], dtype='<U9')],\n",
       "        [array(['estimating'], dtype='<U10')],\n",
       "        [array(['commonly'], dtype='<U8')],\n",
       "        [array(['differences'], dtype='<U11')],\n",
       "        [array(['Existing'], dtype='<U8')],\n",
       "        [array(['incorporate'], dtype='<U11')],\n",
       "        [array(['experiment'], dtype='<U10')],\n",
       "        [array(['desired'], dtype='<U7')],\n",
       "        [array(['extensions'], dtype='<U10')],\n",
       "        [array(['offers'], dtype='<U6')],\n",
       "        [array(['privacy'], dtype='<U7')],\n",
       "        [array(['optimize'], dtype='<U8')],\n",
       "        [array(['unified'], dtype='<U7')],\n",
       "        [array(['transaction'], dtype='<U11')],\n",
       "        [array(['description'], dtype='<U11')],\n",
       "        [array(['kinds'], dtype='<U5')],\n",
       "        [array(['store'], dtype='<U5')],\n",
       "        [array(['effects'], dtype='<U7')],\n",
       "        [array(['real-time'], dtype='<U9')],\n",
       "        [array(['expansion'], dtype='<U9')],\n",
       "        [array(['numbers'], dtype='<U7')],\n",
       "        [array(['growing'], dtype='<U7')],\n",
       "        [array(['forms'], dtype='<U5')],\n",
       "        [array(['variable'], dtype='<U8')],\n",
       "        [array(['naturally'], dtype='<U9')],\n",
       "        [array(['determining'], dtype='<U11')],\n",
       "        [array(['regression'], dtype='<U10')],\n",
       "        [array(['aggregate'], dtype='<U9')],\n",
       "        [array(['avoid'], dtype='<U5')],\n",
       "        [array(['combinatorial'], dtype='<U13')],\n",
       "        [array(['aggregation'], dtype='<U11')],\n",
       "        [array(['threshold'], dtype='<U9')],\n",
       "        [array(['labels'], dtype='<U6')],\n",
       "        [array(['recall'], dtype='<U6')],\n",
       "        [array(['weight'], dtype='<U6')],\n",
       "        [array(['involves'], dtype='<U8')],\n",
       "        [array(['orders'], dtype='<U6')],\n",
       "        [array(['created'], dtype='<U7')],\n",
       "        [array(['performing'], dtype='<U10')],\n",
       "        [array(['measured'], dtype='<U8')],\n",
       "        [array(['diverse'], dtype='<U7')],\n",
       "        [array(['collaborative'], dtype='<U13')],\n",
       "        [array(['great'], dtype='<U5')],\n",
       "        [array(['implementations'], dtype='<U15')],\n",
       "        [array(['measurements'], dtype='<U12')],\n",
       "        [array(['runs'], dtype='<U4')],\n",
       "        [array(['researchers'], dtype='<U11')],\n",
       "        [array(['message'], dtype='<U7')],\n",
       "        [array(['correlation'], dtype='<U11')],\n",
       "        [array(['comparable'], dtype='<U10')],\n",
       "        [array(['characterize'], dtype='<U12')],\n",
       "        [array(['estimates'], dtype='<U9')],\n",
       "        [array(['Mining'], dtype='<U6')],\n",
       "        [array(['solved'], dtype='<U6')],\n",
       "        [array(['textual'], dtype='<U7')],\n",
       "        [array(['monitoring'], dtype='<U10')],\n",
       "        [array(['identified'], dtype='<U10')],\n",
       "        [array(['existence'], dtype='<U9')],\n",
       "        [array(['constructing'], dtype='<U12')],\n",
       "        [array(['fraction'], dtype='<U8')],\n",
       "        [array(['exploits'], dtype='<U8')],\n",
       "        [array(['operators'], dtype='<U9')],\n",
       "        [array(['maintain'], dtype='<U8')],\n",
       "        [array(['cache'], dtype='<U5')],\n",
       "        [array(['representations'], dtype='<U15')],\n",
       "        [array(['images'], dtype='<U6')],\n",
       "        [array(['extracting'], dtype='<U10')],\n",
       "        [array(['generalized'], dtype='<U11')],\n",
       "        [array(['chosen'], dtype='<U6')],\n",
       "        [array(['table'], dtype='<U5')],\n",
       "        [array(['yield'], dtype='<U5')],\n",
       "        [array(['limitations'], dtype='<U11')],\n",
       "        [array(['ideas'], dtype='<U5')],\n",
       "        [array(['hidden'], dtype='<U6')],\n",
       "        [array(['physical'], dtype='<U8')],\n",
       "        [array(['discussed'], dtype='<U9')],\n",
       "        [array(['capabilities'], dtype='<U12')],\n",
       "        [array(['areas'], dtype='<U5')],\n",
       "        [array(['core'], dtype='<U4')],\n",
       "        [array(['news'], dtype='<U4')],\n",
       "        [array(['al'], dtype='<U2')],\n",
       "        [array(['lack'], dtype='<U4')],\n",
       "        [array(['definition'], dtype='<U10')],\n",
       "        [array(['sizes'], dtype='<U5')],\n",
       "        [array(['kernel'], dtype='<U6')],\n",
       "        [array(['produces'], dtype='<U8')],\n",
       "        [array(['maintaining'], dtype='<U11')],\n",
       "        [array(['&gt'], dtype='<U3')],\n",
       "        [array(['contribution'], dtype='<U12')],\n",
       "        [array(['satisfy'], dtype='<U7')],\n",
       "        [array(['ad'], dtype='<U2')],\n",
       "        [array(['policies'], dtype='<U8')],\n",
       "        [array(['capacity'], dtype='<U8')],\n",
       "        [array(['applicable'], dtype='<U10')],\n",
       "        [array(['correct'], dtype='<U7')],\n",
       "        [array(['rule'], dtype='<U4')],\n",
       "        [array(['directed'], dtype='<U8')],\n",
       "        [array(['site'], dtype='<U4')],\n",
       "        [array(['constructed'], dtype='<U11')],\n",
       "        [array(['comparing'], dtype='<U9')],\n",
       "        [array(['processor'], dtype='<U9')],\n",
       "        [array(['browsing'], dtype='<U8')],\n",
       "        [array(['assumptions'], dtype='<U11')],\n",
       "        [array(['sequences'], dtype='<U9')],\n",
       "        [array(['transactions'], dtype='<U12')],\n",
       "        [array(['ranked'], dtype='<U6')],\n",
       "        [array(['spatial'], dtype='<U7')],\n",
       "        [array(['tables'], dtype='<U6')],\n",
       "        [array(['inference'], dtype='<U9')],\n",
       "        [array(['restricted'], dtype='<U10')],\n",
       "        [array(['technologies'], dtype='<U12')],\n",
       "        [array(['Learning'], dtype='<U8')],\n",
       "        [array(['outperform'], dtype='<U10')],\n",
       "        [array(['interactions'], dtype='<U12')],\n",
       "        [array(['generally'], dtype='<U9')],\n",
       "        [array(['join'], dtype='<U4')],\n",
       "        [array(['learned'], dtype='<U7')],\n",
       "        [array(['simulations'], dtype='<U11')],\n",
       "        [array(['distinct'], dtype='<U8')],\n",
       "        [array(['testing'], dtype='<U7')],\n",
       "        [array(['essential'], dtype='<U9')],\n",
       "        [array(['paradigm'], dtype='<U8')],\n",
       "        [array(['views'], dtype='<U5')],\n",
       "        [array(['records'], dtype='<U7')],\n",
       "        [array(['applying'], dtype='<U8')],\n",
       "        [array(['logs'], dtype='<U4')],\n",
       "        [array(['typical'], dtype='<U7')],\n",
       "        [array(['smaller'], dtype='<U7')],\n",
       "        [array(['frequency'], dtype='<U9')],\n",
       "        [array(['effort'], dtype='<U6')],\n",
       "        [array(['disk'], dtype='<U4')],\n",
       "        [array(['hardware'], dtype='<U8')],\n",
       "        [array(['argue'], dtype='<U5')],\n",
       "        [array(['settings'], dtype='<U8')],\n",
       "        [array(['developing'], dtype='<U10')],\n",
       "        [array(['purpose'], dtype='<U7')],\n",
       "        [array(['messages'], dtype='<U8')],\n",
       "        [array(['exists'], dtype='<U6')],\n",
       "        [array(['demonstrates'], dtype='<U12')],\n",
       "        [array(['weights'], dtype='<U7')],\n",
       "        [array(['share'], dtype='<U5')],\n",
       "        [array(['evidence'], dtype='<U8')],\n",
       "        [array(['SQL'], dtype='<U3')],\n",
       "        [array(['Markov'], dtype='<U6')],\n",
       "        [array(['Query'], dtype='<U5')],\n",
       "        [array(['entities'], dtype='<U8')],\n",
       "        [array(['easy'], dtype='<U4')],\n",
       "        [array(['utility'], dtype='<U7')],\n",
       "        [array(['active'], dtype='<U6')],\n",
       "        [array(['reduced'], dtype='<U7')],\n",
       "        [array(['includes'], dtype='<U8')],\n",
       "        [array(['ii'], dtype='<U2')],\n",
       "        [array(['collected'], dtype='<U9')],\n",
       "        [array(['guarantee'], dtype='<U9')],\n",
       "        [array(['reducing'], dtype='<U8')],\n",
       "        [array(['demonstrated'], dtype='<U12')],\n",
       "        [array(['Semantic'], dtype='<U8')],\n",
       "        [array(['devices'], dtype='<U7')],\n",
       "        [array(['servers'], dtype='<U7')],\n",
       "        [array(['role'], dtype='<U4')],\n",
       "        [array(['accurately'], dtype='<U10')],\n",
       "        [array(['selected'], dtype='<U8')],\n",
       "        [array(['business'], dtype='<U8')],\n",
       "        [array(['exponential'], dtype='<U11')],\n",
       "        [array(['file'], dtype='<U4')],\n",
       "        [array(['increases'], dtype='<U9')],\n",
       "        [array(['hierarchy'], dtype='<U9')],\n",
       "        [array(['hand'], dtype='<U4')],\n",
       "        [array(['crucial'], dtype='<U7')],\n",
       "        [array(['usage'], dtype='<U5')],\n",
       "        [array(['classifiers'], dtype='<U11')],\n",
       "        [array(['errors'], dtype='<U6')],\n",
       "        [array(['word'], dtype='<U4')],\n",
       "        [array(['relation'], dtype='<U8')],\n",
       "        [array(['exploiting'], dtype='<U10')],\n",
       "        [array(['formal'], dtype='<U6')],\n",
       "        [array(['analyzing'], dtype='<U9')],\n",
       "        [array(['mapping'], dtype='<U7')],\n",
       "        [array(['consistent'], dtype='<U10')],\n",
       "        [array(['steps'], dtype='<U5')],\n",
       "        [array(['create'], dtype='<U6')],\n",
       "        [array(['procedure'], dtype='<U9')],\n",
       "        [array(['select'], dtype='<U6')],\n",
       "        [array(['baseline'], dtype='<U8')],\n",
       "        [array(['sufficient'], dtype='<U10')],\n",
       "        [array(['uniform'], dtype='<U7')],\n",
       "        [array(['tight'], dtype='<U5')],\n",
       "        [array(['series'], dtype='<U6')],\n",
       "        [array(['combine'], dtype='<U7')],\n",
       "        [array(['combines'], dtype='<U8')],\n",
       "        [array(['sparse'], dtype='<U6')],\n",
       "        [array(['promising'], dtype='<U9')],\n",
       "        [array(['equivalent'], dtype='<U10')],\n",
       "        [array(['rates'], dtype='<U5')],\n",
       "        [array(['static'], dtype='<U6')],\n",
       "        [array(['operating'], dtype='<U9')],\n",
       "        [array(['Previous'], dtype='<U8')],\n",
       "        [array(['match'], dtype='<U5')],\n",
       "        [array(['explicitly'], dtype='<U10')],\n",
       "        [array(['rich'], dtype='<U4')],\n",
       "        [array(['minimal'], dtype='<U7')],\n",
       "        [array(['binary'], dtype='<U6')],\n",
       "        [array(['illustrate'], dtype='<U10')],\n",
       "        [array(['statistics'], dtype='<U10')],\n",
       "        [array(['entire'], dtype='<U6')],\n",
       "        [array(['continuous'], dtype='<U10')],\n",
       "        [array(['generating'], dtype='<U10')],\n",
       "        [array(['methodology'], dtype='<U11')],\n",
       "        [array(['focused'], dtype='<U7')],\n",
       "        [array(['benefit'], dtype='<U7')],\n",
       "        [array(['benchmark'], dtype='<U9')],\n",
       "        [array(['family'], dtype='<U6')],\n",
       "        [array(['introduces'], dtype='<U10')],\n",
       "        [array(['generalization'], dtype='<U14')],\n",
       "        [array(['deal'], dtype='<U4')],\n",
       "        [array(['observed'], dtype='<U8')],\n",
       "        [array(['security'], dtype='<U8')],\n",
       "        [array(['events'], dtype='<U6')],\n",
       "        [array(['connected'], dtype='<U9')],\n",
       "        [array(['partial'], dtype='<U7')],\n",
       "        [array(['evaluating'], dtype='<U10')],\n",
       "        [array(['explicit'], dtype='<U8')],\n",
       "        [array(['Efficient'], dtype='<U9')],\n",
       "        [array(['frequent'], dtype='<U8')],\n",
       "        [array(['assume'], dtype='<U6')],\n",
       "        [array(['field'], dtype='<U5')],\n",
       "        [array(['loss'], dtype='<U4')],\n",
       "        [array(['unique'], dtype='<U6')],\n",
       "        [array(['computed'], dtype='<U8')],\n",
       "        [array(['actual'], dtype='<U6')],\n",
       "        [array(['rely'], dtype='<U4')],\n",
       "        [array(['solving'], dtype='<U7')],\n",
       "        [array(['exact'], dtype='<U5')],\n",
       "        [array(['flexible'], dtype='<U8')],\n",
       "        [array(['extracted'], dtype='<U9')],\n",
       "        [array(['competitive'], dtype='<U11')],\n",
       "        [array(['advantages'], dtype='<U10')],\n",
       "        [array(['sampling'], dtype='<U8')],\n",
       "        [array(['load'], dtype='<U4')],\n",
       "        [array(['schema'], dtype='<U6')],\n",
       "        [array(['represented'], dtype='<U11')],\n",
       "        [array(['heterogeneous'], dtype='<U13')],\n",
       "        [array(['conventional'], dtype='<U12')],\n",
       "        [array(['predict'], dtype='<U7')],\n",
       "        [array(['assumption'], dtype='<U10')],\n",
       "        [array(['image'], dtype='<U5')],\n",
       "        [array(['querying'], dtype='<U8')],\n",
       "        [array(['answering'], dtype='<U9')],\n",
       "        [array(['depends'], dtype='<U7')],\n",
       "        [array(['detailed'], dtype='<U8')],\n",
       "        [array(['bandwidth'], dtype='<U9')],\n",
       "        [array(['classical'], dtype='<U9')],\n",
       "        [array(['labeled'], dtype='<U7')],\n",
       "        [array(['objective'], dtype='<U9')],\n",
       "        [array(['ways'], dtype='<U4')],\n",
       "        [array(['well-known'], dtype='<U10')],\n",
       "        [array(['5'], dtype='<U1')],\n",
       "        [array(['packet'], dtype='<U6')],\n",
       "        [array(['sequential'], dtype='<U10')],\n",
       "        [array(['supporting'], dtype='<U10')],\n",
       "        [array(['levels'], dtype='<U6')],\n",
       "        [array(['experience'], dtype='<U10')],\n",
       "        [array(['understanding'], dtype='<U13')],\n",
       "        [array(['magnitude'], dtype='<U9')],\n",
       "        [array(['detect'], dtype='<U6')],\n",
       "        [array(['weighted'], dtype='<U8')],\n",
       "        [array(['hard'], dtype='<U4')],\n",
       "        [array(['minimize'], dtype='<U8')],\n",
       "        [array(['parameter'], dtype='<U9')],\n",
       "        [array(['sense'], dtype='<U5')],\n",
       "        [array(['reduces'], dtype='<U7')],\n",
       "        [array(['IR'], dtype='<U2')],\n",
       "        [array(['base'], dtype='<U4')],\n",
       "        [array(['strong'], dtype='<U6')],\n",
       "        [array(['corpus'], dtype='<U6')],\n",
       "        [array(['product'], dtype='<U7')],\n",
       "        [array(['location'], dtype='<U8')],\n",
       "        [array(['variables'], dtype='<U9')],\n",
       "        [array(['benefits'], dtype='<U8')],\n",
       "        [array(['expensive'], dtype='<U9')],\n",
       "        [array(['edge'], dtype='<U4')],\n",
       "        [array(['currently'], dtype='<U9')],\n",
       "        [array(['people'], dtype='<U6')],\n",
       "        [array(['exist'], dtype='<U5')],\n",
       "        [array(['allowing'], dtype='<U8')],\n",
       "        [array(['structural'], dtype='<U10')],\n",
       "        [array(['close'], dtype='<U5')],\n",
       "        [array(['classifier'], dtype='<U10')],\n",
       "        [array(['larger'], dtype='<U6')],\n",
       "        [array(['addresses'], dtype='<U9')],\n",
       "        [array(['sample'], dtype='<U6')],\n",
       "        [array(['flow'], dtype='<U4')],\n",
       "        [array(['component'], dtype='<U9')],\n",
       "        [array(['combining'], dtype='<U9')],\n",
       "        [array(['groups'], dtype='<U6')],\n",
       "        [array(['discover'], dtype='<U8')],\n",
       "        [array(['group'], dtype='<U5')],\n",
       "        [array(['questions'], dtype='<U9')],\n",
       "        [array(['speed'], dtype='<U5')],\n",
       "        [array(['Data'], dtype='<U4')],\n",
       "        [array(['examine'], dtype='<U7')],\n",
       "        [array(['scenarios'], dtype='<U9')],\n",
       "        [array(['pairs'], dtype='<U5')],\n",
       "        [array(['yields'], dtype='<U6')],\n",
       "        [array(['dynamically'], dtype='<U11')],\n",
       "        [array(['suitable'], dtype='<U8')],\n",
       "        [array(['adaptive'], dtype='<U8')],\n",
       "        [array(['interactive'], dtype='<U11')],\n",
       "        [array(['finite'], dtype='<U6')],\n",
       "        [array(['combined'], dtype='<U8')],\n",
       "        [array(['change'], dtype='<U6')],\n",
       "        [array(['temporal'], dtype='<U8')],\n",
       "        [array(['TREC'], dtype='<U4')],\n",
       "        [array(['sharing'], dtype='<U7')],\n",
       "        [array(['presence'], dtype='<U8')],\n",
       "        [array(['instance'], dtype='<U8')],\n",
       "        [array(['mobile'], dtype='<U6')],\n",
       "        [array(['paths'], dtype='<U5')],\n",
       "        [array(['code'], dtype='<U4')],\n",
       "        [array(['extension'], dtype='<U9')],\n",
       "        [array(['powerful'], dtype='<U8')],\n",
       "        [array(['increasingly'], dtype='<U12')],\n",
       "        [array(['metrics'], dtype='<U7')],\n",
       "        [array(['instances'], dtype='<U9')],\n",
       "        [array(['critical'], dtype='<U8')],\n",
       "        [array(['positive'], dtype='<U8')],\n",
       "        [array(['leads'], dtype='<U5')],\n",
       "        [array(['conducted'], dtype='<U9')],\n",
       "        [array(['consists'], dtype='<U8')],\n",
       "        [array(['output'], dtype='<U6')],\n",
       "        [array(['takes'], dtype='<U5')],\n",
       "        [array(['fully'], dtype='<U5')],\n",
       "        [array(['stored'], dtype='<U6')],\n",
       "        [array(['Second'], dtype='<U6')],\n",
       "        [array(['manner'], dtype='<U6')],\n",
       "        [array(['proof'], dtype='<U5')],\n",
       "        [array(['relations'], dtype='<U9')],\n",
       "        [array(['attention'], dtype='<U9')],\n",
       "        [array(['future'], dtype='<U6')],\n",
       "        [array(['interaction'], dtype='<U11')],\n",
       "        [array(['items'], dtype='<U5')],\n",
       "        [array(['basis'], dtype='<U5')],\n",
       "        [array(['vertices'], dtype='<U8')],\n",
       "        [array(['elements'], dtype='<U8')],\n",
       "        [array(['costs'], dtype='<U5')],\n",
       "        [array(['streams'], dtype='<U7')],\n",
       "        [array(['challenge'], dtype='<U9')],\n",
       "        [array(['lead'], dtype='<U4')],\n",
       "        [array(['processors'], dtype='<U10')],\n",
       "        [array(['report'], dtype='<U6')],\n",
       "        [array(['extract'], dtype='<U7')],\n",
       "        [array(['operation'], dtype='<U9')],\n",
       "        [array(['prior'], dtype='<U5')],\n",
       "        [array(['searching'], dtype='<U9')],\n",
       "        [array(['full'], dtype='<U4')],\n",
       "        [array(['commercial'], dtype='<U10')],\n",
       "        [array(['technology'], dtype='<U10')],\n",
       "        [array(['built'], dtype='<U5')],\n",
       "        [array(['stream'], dtype='<U6')],\n",
       "        [array(['estimation'], dtype='<U10')],\n",
       "        [array(['list'], dtype='<U4')],\n",
       "        [array(['relationship'], dtype='<U12')],\n",
       "        [array(['answers'], dtype='<U7')],\n",
       "        [array(['indicate'], dtype='<U8')],\n",
       "        [array(['updates'], dtype='<U7')],\n",
       "        [array(['challenging'], dtype='<U11')],\n",
       "        [array(['works'], dtype='<U5')],\n",
       "        [array(['machines'], dtype='<U8')],\n",
       "        [array(['program'], dtype='<U7')],\n",
       "        [array(['integrated'], dtype='<U10')],\n",
       "        [array(['scheduling'], dtype='<U10')],\n",
       "        [array(['target'], dtype='<U6')],\n",
       "        [array(['randomized'], dtype='<U10')],\n",
       "        [array(['update'], dtype='<U6')],\n",
       "        [array(['call'], dtype='<U4')],\n",
       "        [array(['throughput'], dtype='<U10')],\n",
       "        [array(['factors'], dtype='<U7')],\n",
       "        [array(['processes'], dtype='<U9')],\n",
       "        [array(['showing'], dtype='<U7')],\n",
       "        [array(['enable'], dtype='<U6')],\n",
       "        [array(['shared'], dtype='<U6')],\n",
       "        [array(['filtering'], dtype='<U9')],\n",
       "        [array(['prediction'], dtype='<U10')],\n",
       "        [array(['short'], dtype='<U5')],\n",
       "        [array(['nature'], dtype='<U6')],\n",
       "        [array(['proposes'], dtype='<U8')],\n",
       "        [array(['human'], dtype='<U5')],\n",
       "        [array(['reduction'], dtype='<U9')],\n",
       "        [array(['simultaneously'], dtype='<U14')],\n",
       "        [array(['Based'], dtype='<U5')],\n",
       "        [array(['server'], dtype='<U6')],\n",
       "        [array(['relative'], dtype='<U8')],\n",
       "        [array(['object'], dtype='<U6')],\n",
       "        [array(['identifying'], dtype='<U11')],\n",
       "        [array(['guarantees'], dtype='<U10')],\n",
       "        [array(['top'], dtype='<U3')],\n",
       "        [array(['simulation'], dtype='<U10')],\n",
       "        [array(['practice'], dtype='<U8')],\n",
       "        [array(['ratio'], dtype='<U5')],\n",
       "        [array(['initial'], dtype='<U7')],\n",
       "        [array(['idea'], dtype='<U4')],\n",
       "        [array(['capture'], dtype='<U7')],\n",
       "        [array(['rank'], dtype='<U4')],\n",
       "        [array(['implement'], dtype='<U9')],\n",
       "        [array(['derive'], dtype='<U6')],\n",
       "        [array(['hierarchical'], dtype='<U12')],\n",
       "        [array(['handle'], dtype='<U6')],\n",
       "        [array(['community'], dtype='<U9')],\n",
       "        [array(['literature'], dtype='<U10')],\n",
       "        [array(['described'], dtype='<U9')],\n",
       "        [array(['4'], dtype='<U1')],\n",
       "        [array(['faster'], dtype='<U6')],\n",
       "        [array(['resource'], dtype='<U8')],\n",
       "        [array(['issue'], dtype='<U5')],\n",
       "        [array(['environments'], dtype='<U12')],\n",
       "        [array(['matrix'], dtype='<U6')],\n",
       "        [array(['area'], dtype='<U4')],\n",
       "        [array(['distributions'], dtype='<U13')],\n",
       "        [array(['account'], dtype='<U7')],\n",
       "        [array(['concepts'], dtype='<U8')],\n",
       "        [array(['attributes'], dtype='<U10')],\n",
       "        [array(['aspects'], dtype='<U7')],\n",
       "        [array(['mechanisms'], dtype='<U10')],\n",
       "        [array(['synthetic'], dtype='<U9')],\n",
       "        [array(['cluster'], dtype='<U7')],\n",
       "        [array(['learn'], dtype='<U5')],\n",
       "        [array(['metric'], dtype='<U6')],\n",
       "        [array(['response'], dtype='<U8')],\n",
       "        [array(['contrast'], dtype='<U8')],\n",
       "        [array(['integration'], dtype='<U11')],\n",
       "        [array(['structured'], dtype='<U10')],\n",
       "        [array(['upper'], dtype='<U5')],\n",
       "        [array(['bounded'], dtype='<U7')],\n",
       "        [array(['trees'], dtype='<U5')],\n",
       "        [array(['extended'], dtype='<U8')],\n",
       "        [array(['programs'], dtype='<U8')],\n",
       "        [array(['sites'], dtype='<U5')],\n",
       "        [array(['discovery'], dtype='<U9')],\n",
       "        [array(['edges'], dtype='<U5')],\n",
       "        [array(['estimate'], dtype='<U8')],\n",
       "        [array(['clusters'], dtype='<U8')],\n",
       "        [array(['produce'], dtype='<U7')],\n",
       "        [array(['advantage'], dtype='<U9')],\n",
       "        [array(['overhead'], dtype='<U8')],\n",
       "        [array(['generation'], dtype='<U10')],\n",
       "        [array(['deterministic'], dtype='<U13')],\n",
       "        [array(['exploit'], dtype='<U7')],\n",
       "        [array(['computer'], dtype='<U8')],\n",
       "        [array(['property'], dtype='<U8')],\n",
       "        [array(['needed'], dtype='<U6')],\n",
       "        [array(['corresponding'], dtype='<U13')],\n",
       "        [array(['feedback'], dtype='<U8')],\n",
       "        [array(['derived'], dtype='<U7')],\n",
       "        [array(['performs'], dtype='<U8')],\n",
       "        [array(['conditions'], dtype='<U10')],\n",
       "        [array(['comparison'], dtype='<U10')],\n",
       "        [array(['represent'], dtype='<U9')],\n",
       "        [array(['appropriate'], dtype='<U11')],\n",
       "        [array(['long'], dtype='<U4')],\n",
       "        [array(['vector'], dtype='<U6')],\n",
       "        [array(['development'], dtype='<U11')],\n",
       "        [array(['topics'], dtype='<U6')],\n",
       "        [array(['dataset'], dtype='<U7')],\n",
       "        [array(['robust'], dtype='<U6')],\n",
       "        [array(['links'], dtype='<U5')],\n",
       "        [array(['easily'], dtype='<U6')],\n",
       "        [array(['semantics'], dtype='<U9')],\n",
       "        [array(['wireless'], dtype='<U8')],\n",
       "        [array(['scalability'], dtype='<U11')],\n",
       "        [array(['version'], dtype='<U7')],\n",
       "        [array(['components'], dtype='<U10')],\n",
       "        [array(['interface'], dtype='<U9')],\n",
       "        [array(['large-scale'], dtype='<U11')],\n",
       "        [array(['subset'], dtype='<U6')],\n",
       "        [array(['theoretical'], dtype='<U11')],\n",
       "        [array(['extraction'], dtype='<U10')],\n",
       "        [array(['improving'], dtype='<U9')],\n",
       "        [array(['performed'], dtype='<U9')],\n",
       "        [array(['pattern'], dtype='<U7')],\n",
       "        [array(['evaluated'], dtype='<U9')],\n",
       "        [array(['supports'], dtype='<U8')],\n",
       "        [array(['term'], dtype='<U4')],\n",
       "        [array(['empirical'], dtype='<U9')],\n",
       "        [array(['increasing'], dtype='<U10')],\n",
       "        [array(['solve'], dtype='<U5')],\n",
       "        [array(['path'], dtype='<U4')],\n",
       "        [array(['global'], dtype='<U6')],\n",
       "        [array(['step'], dtype='<U4')],\n",
       "        [array(['='], dtype='<U1')],\n",
       "        [array(['improvements'], dtype='<U12')],\n",
       "        [array(['decision'], dtype='<U8')],\n",
       "        [array(['generate'], dtype='<U8')],\n",
       "        [array(['impact'], dtype='<U6')],\n",
       "        [array(['making'], dtype='<U6')],\n",
       "        [array(['sequence'], dtype='<U8')],\n",
       "        [array(['indexing'], dtype='<U8')],\n",
       "        [array(['software'], dtype='<U8')],\n",
       "        [array(['build'], dtype='<U5')],\n",
       "        [array(['building'], dtype='<U8')],\n",
       "        [array(['ability'], dtype='<U7')],\n",
       "        [array(['studied'], dtype='<U7')],\n",
       "        [array(['increase'], dtype='<U8')],\n",
       "        [array(['relationships'], dtype='<U13')],\n",
       "        [array(['traffic'], dtype='<U7')],\n",
       "        [array(['degree'], dtype='<U6')],\n",
       "        [array(['alternative'], dtype='<U11')],\n",
       "        [array(['domains'], dtype='<U7')],\n",
       "        [array(['error'], dtype='<U5')],\n",
       "        [array(['+'], dtype='<U1')],\n",
       "        [array(['languages'], dtype='<U9')],\n",
       "        [array(['tools'], dtype='<U5')],\n",
       "        [array(['extend'], dtype='<U6')],\n",
       "        [array(['fundamental'], dtype='<U11')],\n",
       "        [array(['construction'], dtype='<U12')],\n",
       "        [array(['construct'], dtype='<U9')],\n",
       "        [array(['help'], dtype='<U4')],\n",
       "        [array(['achieved'], dtype='<U8')],\n",
       "        [array(['strategies'], dtype='<U10')],\n",
       "        [array(['introduced'], dtype='<U10')],\n",
       "        [array(['notion'], dtype='<U6')],\n",
       "        [array(['challenges'], dtype='<U10')],\n",
       "        [array(['total'], dtype='<U5')],\n",
       "        [array(['routing'], dtype='<U7')],\n",
       "        [array(['length'], dtype='<U6')],\n",
       "        [array(['fact'], dtype='<U4')],\n",
       "        [array(['answer'], dtype='<U6')],\n",
       "        [array(['setting'], dtype='<U7')],\n",
       "        [array(['open'], dtype='<U4')],\n",
       "        [array(['rules'], dtype='<U5')],\n",
       "        [array(['basic'], dtype='<U5')],\n",
       "        [array(['years'], dtype='<U5')],\n",
       "        [array(['tool'], dtype='<U4')],\n",
       "        [array(['wide'], dtype='<U4')],\n",
       "        [array(['combination'], dtype='<U11')],\n",
       "        [array(['view'], dtype='<U4')],\n",
       "        [array(['fixed'], dtype='<U5')],\n",
       "        [array(['typically'], dtype='<U9')],\n",
       "        [array(['providing'], dtype='<U9')],\n",
       "        [array(['explore'], dtype='<U7')],\n",
       "        [array(['respect'], dtype='<U7')],\n",
       "        [array(['schemes'], dtype='<U7')],\n",
       "        [array(['minimum'], dtype='<U7')],\n",
       "        [array(['additional'], dtype='<U10')],\n",
       "        [array(['expected'], dtype='<U8')],\n",
       "        [array(['special'], dtype='<U7')],\n",
       "        [array(['running'], dtype='<U7')],\n",
       "        [array(['examples'], dtype='<U8')],\n",
       "        [array(['enables'], dtype='<U7')],\n",
       "        [array(['automatic'], dtype='<U9')],\n",
       "        [array(['directly'], dtype='<U8')],\n",
       "        [array(['prototype'], dtype='<U9')],\n",
       "        [array(['link'], dtype='<U4')],\n",
       "        [array(['higher'], dtype='<U6')],\n",
       "        [array(['scale'], dtype='<U5')],\n",
       "        [array(['precision'], dtype='<U9')],\n",
       "        [array(['theory'], dtype='<U6')],\n",
       "        [array(['complete'], dtype='<U8')],\n",
       "        [array(['state-of-the-art'], dtype='<U16')],\n",
       "        [array(['power'], dtype='<U5')]], dtype=object),\n",
       " 'CNormnnPvsT': <12499x767 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 185911 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormnnPvsT': <12499x767 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 185911 stored elements in Compressed Sparse Column format>,\n",
       " 'PvsP': <12499x12499 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 30789 stored elements in Compressed Sparse Column format>,\n",
       " 'CNormPvsP': <12499x12499 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 30789 stored elements in Compressed Sparse Column format>,\n",
       " 'RNormPvsP': <12499x12499 sparse matrix of type '<class 'numpy.float64'>'\n",
       " \twith 30789 stored elements in Compressed Sparse Column format>}"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mat_file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "paper_conf = mat_file['PvsC'].nonzero()[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# DataBase\n",
    "paper_db = np.isin(paper_conf,[1,13])\n",
    "paper_db_idx = np.where(paper_db == True)[0]\n",
    "paper_db_idx = np.sort(np.random.choice(paper_db_idx,994,replace=False))\n",
    "# Data Mining\n",
    "paper_dm = np.isin(paper_conf,[0])\n",
    "paper_dm_idx = np.where(paper_dm == True)[0]\n",
    "# Wireless Communication\n",
    "paper_wc = np.isin(paper_conf,[9,10])\n",
    "paper_wc_idx = np.where(paper_wc == True)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "paper_idx = np.sort(list(paper_db_idx)+list(paper_dm_idx)+list(paper_wc_idx))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3025"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(paper_idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 0 : database, 1: wireless communication, 2: data mining\n",
    "paper_target = []\n",
    "for idx in paper_idx:\n",
    "    if idx in paper_db_idx:\n",
    "        paper_target.append(0)\n",
    "    elif idx in paper_wc_idx:\n",
    "        paper_target.append(1)\n",
    "    else:\n",
    "        paper_target.append(2)\n",
    "paper_target = np.array(paper_target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3025,)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paper_target.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Edges (PAP, PSP)\n",
    "[0,1,9,10,13] : KDD,SIGMOD,SIGCOMM,MobiCOMM,VLDB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<12499x17431 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 37055 stored elements in Compressed Sparse Column format>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mat_file['PvsA']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "authors = mat_file['PvsA'][paper_idx].nonzero()[1]\n",
    "author_dic = {}\n",
    "re_authors = []\n",
    "for author in authors:\n",
    "    if author not in author_dic:\n",
    "        author_dic[author] = len(author_dic) + len(paper_idx)\n",
    "    re_authors.append(author_dic[author])\n",
    "re_authors = np.array(re_authors)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5915"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(author_dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "subjects = mat_file['PvsL'][paper_idx].nonzero()[1]\n",
    "subject_dic = {}\n",
    "re_subjects = []\n",
    "for subject in subjects:\n",
    "    if subject not in subject_dic:\n",
    "        subject_dic[subject] = len(subject_dic) + len(paper_idx) + len(author_dic)\n",
    "    re_subjects.append(subject_dic[subject])\n",
    "re_subjects = np.array(re_subjects)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "56"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(subject_dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "node_num = len(paper_idx) + len(author_dic) + len(subject_dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8996"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "node_num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "papers = mat_file['PvsA'][paper_idx].nonzero()[0]\n",
    "data = np.ones_like(papers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "A_pa = csr_matrix((data, (papers, re_authors)), shape=(node_num,node_num))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<8996x8996 sparse matrix of type '<class 'numpy.int32'>'\n",
       "\twith 10001 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_pa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "papers = mat_file['PvsL'][paper_idx].nonzero()[0]\n",
    "data = np.ones_like(papers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "A_ps = csr_matrix((data, (papers, re_subjects)), shape=(node_num,node_num))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<8996x8996 sparse matrix of type '<class 'numpy.int32'>'\n",
       "\twith 3025 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A_ps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "A_ap = A_pa.transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "A_sp = A_ps.transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "edges = [A_pa,A_ap,A_ps,A_sp]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Node Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "terms = mat_file['TvsP'].transpose()[paper_idx].nonzero()[1]\n",
    "term_dic = {}\n",
    "re_terms = []\n",
    "for term in terms:\n",
    "    if term not in term_dic:\n",
    "        term_dic[term] = len(term_dic) + len(paper_idx) + len(author_dic) + len(subject_dic)\n",
    "    re_terms.append(term_dic[term])\n",
    "re_terms = np.array(re_terms)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<12499x1903 sparse matrix of type '<class 'numpy.float64'>'\n",
       "\twith 972973 stored elements in Compressed Sparse Row format>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mat_file['TvsP'].transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# tmp\n",
    "tmp_num_node = node_num + len(term_dic)\n",
    "papers = mat_file['PvsA'][paper_idx].nonzero()[0]\n",
    "data = np.ones_like(papers)\n",
    "A_pa_tmp = csr_matrix((data, (papers, re_authors)), shape=(tmp_num_node,tmp_num_node))\n",
    "papers = mat_file['PvsL'][paper_idx].nonzero()[0]\n",
    "data = np.ones_like(papers)\n",
    "A_ps_tmp = csr_matrix((data, (papers, re_subjects)), shape=(tmp_num_node,tmp_num_node))\n",
    "papers = mat_file['PvsT'][paper_idx].nonzero()[0]\n",
    "data = np.ones_like(papers)\n",
    "A_pt_tmp = csr_matrix((data, (papers, re_terms)), shape=(tmp_num_node,tmp_num_node))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "paper_feat = np.array(A_pt_tmp[:len(paper_idx),-len(term_dic):].toarray()>0, dtype=np.int)\n",
    "author_feat = np.array(A_pa_tmp.transpose().dot(A_pt_tmp)[len(paper_idx):len(paper_idx)+len(author_dic),-len(term_dic):].toarray()>0, dtype=np.int)\n",
    "subject_feat = np.array(A_ps_tmp.transpose().dot(A_pt_tmp)[len(paper_idx)+len(author_dic):len(paper_idx)+len(author_dic)+len(subject_dic),-len(term_dic):].toarray()>0, dtype=np.int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "node_faeture = np.concatenate((paper_feat,author_feat,subject_feat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8996, 1902)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "node_faeture.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3025,)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paper_target.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train, Valid\n",
    "train_valid_DB = list(np.random.choice(np.where(paper_target==0)[0],300, replace=False))\n",
    "train_valid_WC = list(np.random.choice(np.where(paper_target==1)[0],300, replace=False))\n",
    "train_valid_DM = list(np.random.choice(np.where(paper_target==2)[0],300, replace=False))\n",
    "\n",
    "train_idx = np.array(train_valid_DB[:200] + train_valid_WC[:200] + train_valid_DM[:200])\n",
    "train_target = paper_target[train_idx]\n",
    "train_label = np.vstack((train_idx,train_target)).transpose()\n",
    "valid_idx = np.array(train_valid_DB[200:] + train_valid_WC[200:] + train_valid_DM[200:])\n",
    "valid_target = paper_target[valid_idx]\n",
    "valid_label = np.vstack((valid_idx,valid_target)).transpose()\n",
    "test_idx = np.array(list((set(np.arange(paper_target.shape[0])) - set(train_idx)) - set(valid_idx)))\n",
    "test_target = paper_target[test_idx]\n",
    "test_label = np.vstack((test_idx,test_target)).transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = [train_label,valid_label,test_label]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([[1595,    0],\n",
       "        [1782,    0],\n",
       "        [1145,    0],\n",
       "        ...,\n",
       "        [ 618,    2],\n",
       "        [ 207,    2],\n",
       "        [ 373,    2]]), array([[ 933,    0],\n",
       "        [1582,    0],\n",
       "        [1470,    0],\n",
       "        [1899,    0],\n",
       "        [ 873,    0],\n",
       "        [1925,    0],\n",
       "        [1831,    0],\n",
       "        [1857,    0],\n",
       "        [1057,    0],\n",
       "        [1040,    0],\n",
       "        [1027,    0],\n",
       "        [ 801,    0],\n",
       "        [1921,    0],\n",
       "        [1268,    0],\n",
       "        [1728,    0],\n",
       "        [1416,    0],\n",
       "        [1732,    0],\n",
       "        [1717,    0],\n",
       "        [ 984,    0],\n",
       "        [1939,    0],\n",
       "        [1674,    0],\n",
       "        [ 866,    0],\n",
       "        [1874,    0],\n",
       "        [1254,    0],\n",
       "        [1761,    0],\n",
       "        [1735,    0],\n",
       "        [1098,    0],\n",
       "        [1256,    0],\n",
       "        [1500,    0],\n",
       "        [1496,    0],\n",
       "        [1133,    0],\n",
       "        [ 973,    0],\n",
       "        [2370,    0],\n",
       "        [1699,    0],\n",
       "        [ 941,    0],\n",
       "        [2353,    0],\n",
       "        [1151,    0],\n",
       "        [1541,    0],\n",
       "        [1090,    0],\n",
       "        [1118,    0],\n",
       "        [ 943,    0],\n",
       "        [ 928,    0],\n",
       "        [1931,    0],\n",
       "        [1062,    0],\n",
       "        [1483,    0],\n",
       "        [ 843,    0],\n",
       "        [ 944,    0],\n",
       "        [ 932,    0],\n",
       "        [1410,    0],\n",
       "        [1417,    0],\n",
       "        [1845,    0],\n",
       "        [1374,    0],\n",
       "        [1206,    0],\n",
       "        [1577,    0],\n",
       "        [1146,    0],\n",
       "        [1395,    0],\n",
       "        [1743,    0],\n",
       "        [1401,    0],\n",
       "        [1079,    0],\n",
       "        [1170,    0],\n",
       "        [ 806,    0],\n",
       "        [2364,    0],\n",
       "        [1833,    0],\n",
       "        [1856,    0],\n",
       "        [1161,    0],\n",
       "        [2366,    0],\n",
       "        [1075,    0],\n",
       "        [ 859,    0],\n",
       "        [1092,    0],\n",
       "        [1682,    0],\n",
       "        [1788,    0],\n",
       "        [ 861,    0],\n",
       "        [1480,    0],\n",
       "        [1138,    0],\n",
       "        [1886,    0],\n",
       "        [1093,    0],\n",
       "        [1713,    0],\n",
       "        [ 816,    0],\n",
       "        [ 808,    0],\n",
       "        [2389,    0],\n",
       "        [2369,    0],\n",
       "        [2349,    0],\n",
       "        [1560,    0],\n",
       "        [ 867,    0],\n",
       "        [1261,    0],\n",
       "        [1673,    0],\n",
       "        [ 967,    0],\n",
       "        [1384,    0],\n",
       "        [1013,    0],\n",
       "        [1636,    0],\n",
       "        [1485,    0],\n",
       "        [2403,    0],\n",
       "        [1167,    0],\n",
       "        [1714,    0],\n",
       "        [1823,    0],\n",
       "        [1120,    0],\n",
       "        [1606,    0],\n",
       "        [1000,    0],\n",
       "        [1832,    0],\n",
       "        [1904,    0],\n",
       "        [1236,    1],\n",
       "        [2897,    1],\n",
       "        [1364,    1],\n",
       "        [ 446,    1],\n",
       "        [2206,    1],\n",
       "        [1424,    1],\n",
       "        [ 497,    1],\n",
       "        [2324,    1],\n",
       "        [2013,    1],\n",
       "        [1229,    1],\n",
       "        [2860,    1],\n",
       "        [2189,    1],\n",
       "        [ 322,    1],\n",
       "        [2037,    1],\n",
       "        [2434,    1],\n",
       "        [2160,    1],\n",
       "        [2445,    1],\n",
       "        [1955,    1],\n",
       "        [2002,    1],\n",
       "        [ 316,    1],\n",
       "        [1450,    1],\n",
       "        [ 442,    1],\n",
       "        [1365,    1],\n",
       "        [2306,    1],\n",
       "        [1186,    1],\n",
       "        [1320,    1],\n",
       "        [1298,    1],\n",
       "        [2880,    1],\n",
       "        [ 145,    1],\n",
       "        [ 295,    1],\n",
       "        [ 315,    1],\n",
       "        [2870,    1],\n",
       "        [2075,    1],\n",
       "        [2054,    1],\n",
       "        [1516,    1],\n",
       "        [2226,    1],\n",
       "        [2681,    1],\n",
       "        [2187,    1],\n",
       "        [ 456,    1],\n",
       "        [2052,    1],\n",
       "        [ 325,    1],\n",
       "        [2284,    1],\n",
       "        [1538,    1],\n",
       "        [2856,    1],\n",
       "        [2850,    1],\n",
       "        [2015,    1],\n",
       "        [2238,    1],\n",
       "        [2441,    1],\n",
       "        [1240,    1],\n",
       "        [2566,    1],\n",
       "        [2908,    1],\n",
       "        [2123,    1],\n",
       "        [1999,    1],\n",
       "        [1982,    1],\n",
       "        [2404,    1],\n",
       "        [1539,    1],\n",
       "        [1998,    1],\n",
       "        [2887,    1],\n",
       "        [ 132,    1],\n",
       "        [2090,    1],\n",
       "        [1965,    1],\n",
       "        [2893,    1],\n",
       "        [2676,    1],\n",
       "        [2443,    1],\n",
       "        [2433,    1],\n",
       "        [2565,    1],\n",
       "        [2177,    1],\n",
       "        [1187,    1],\n",
       "        [1290,    1],\n",
       "        [2047,    1],\n",
       "        [ 303,    1],\n",
       "        [2287,    1],\n",
       "        [2451,    1],\n",
       "        [2199,    1],\n",
       "        [2276,    1],\n",
       "        [2414,    1],\n",
       "        [ 508,    1],\n",
       "        [1530,    1],\n",
       "        [1537,    1],\n",
       "        [2011,    1],\n",
       "        [2271,    1],\n",
       "        [2247,    1],\n",
       "        [2260,    1],\n",
       "        [2774,    1],\n",
       "        [ 654,    1],\n",
       "        [1984,    1],\n",
       "        [ 444,    1],\n",
       "        [1438,    1],\n",
       "        [2567,    1],\n",
       "        [2091,    1],\n",
       "        [2205,    1],\n",
       "        [2257,    1],\n",
       "        [1532,    1],\n",
       "        [ 674,    1],\n",
       "        [1191,    1],\n",
       "        [1427,    1],\n",
       "        [1993,    1],\n",
       "        [1420,    1],\n",
       "        [2868,    1],\n",
       "        [2237,    1],\n",
       "        [ 397,    2],\n",
       "        [2477,    2],\n",
       "        [2985,    2],\n",
       "        [ 567,    2],\n",
       "        [  17,    2],\n",
       "        [2636,    2],\n",
       "        [2586,    2],\n",
       "        [2649,    2],\n",
       "        [2497,    2],\n",
       "        [2475,    2],\n",
       "        [2689,    2],\n",
       "        [ 268,    2],\n",
       "        [2614,    2],\n",
       "        [2514,    2],\n",
       "        [ 262,    2],\n",
       "        [2525,    2],\n",
       "        [ 605,    2],\n",
       "        [2484,    2],\n",
       "        [2929,    2],\n",
       "        [2601,    2],\n",
       "        [ 266,    2],\n",
       "        [2795,    2],\n",
       "        [ 579,    2],\n",
       "        [  69,    2],\n",
       "        [ 782,    2],\n",
       "        [  99,    2],\n",
       "        [ 110,    2],\n",
       "        [ 350,    2],\n",
       "        [ 550,    2],\n",
       "        [2505,    2],\n",
       "        [2533,    2],\n",
       "        [ 429,    2],\n",
       "        [2812,    2],\n",
       "        [ 370,    2],\n",
       "        [2633,    2],\n",
       "        [ 530,    2],\n",
       "        [2663,    2],\n",
       "        [ 260,    2],\n",
       "        [ 624,    2],\n",
       "        [ 180,    2],\n",
       "        [ 102,    2],\n",
       "        [2506,    2],\n",
       "        [ 777,    2],\n",
       "        [2479,    2],\n",
       "        [2515,    2],\n",
       "        [2591,    2],\n",
       "        [ 571,    2],\n",
       "        [2724,    2],\n",
       "        [ 240,    2],\n",
       "        [2941,    2],\n",
       "        [ 617,    2],\n",
       "        [2806,    2],\n",
       "        [ 186,    2],\n",
       "        [ 745,    2],\n",
       "        [2698,    2],\n",
       "        [ 214,    2],\n",
       "        [ 547,    2],\n",
       "        [ 545,    2],\n",
       "        [2732,    2],\n",
       "        [  64,    2],\n",
       "        [2639,    2],\n",
       "        [2749,    2],\n",
       "        [2495,    2],\n",
       "        [2968,    2],\n",
       "        [ 285,    2],\n",
       "        [3006,    2],\n",
       "        [  85,    2],\n",
       "        [ 723,    2],\n",
       "        [2787,    2],\n",
       "        [ 339,    2],\n",
       "        [ 286,    2],\n",
       "        [  81,    2],\n",
       "        [2478,    2],\n",
       "        [ 750,    2],\n",
       "        [2927,    2],\n",
       "        [2975,    2],\n",
       "        [ 749,    2],\n",
       "        [ 271,    2],\n",
       "        [2925,    2],\n",
       "        [2983,    2],\n",
       "        [ 395,    2],\n",
       "        [2571,    2],\n",
       "        [2700,    2],\n",
       "        [ 716,    2],\n",
       "        [2647,    2],\n",
       "        [  28,    2],\n",
       "        [3011,    2],\n",
       "        [3017,    2],\n",
       "        [2536,    2],\n",
       "        [ 364,    2],\n",
       "        [2835,    2],\n",
       "        [2833,    2],\n",
       "        [2789,    2],\n",
       "        [ 411,    2],\n",
       "        [2627,    2],\n",
       "        [2664,    2],\n",
       "        [2578,    2],\n",
       "        [2751,    2],\n",
       "        [ 273,    2],\n",
       "        [ 636,    2]]), array([[   0,    2],\n",
       "        [   1,    2],\n",
       "        [   2,    2],\n",
       "        ...,\n",
       "        [3021,    2],\n",
       "        [3023,    2],\n",
       "        [3024,    2]])]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
